diff --git a/nifi-assembly/pom.xml b/nifi-assembly/pom.xml index 32e3853417..c0edbf2e00 100644 --- a/nifi-assembly/pom.xml +++ b/nifi-assembly/pom.xml @@ -1036,55 +1036,6 @@ language governing permissions and limitations under the License. --> - - include-hive - - false - - allProfiles - - - - - org.apache.nifi - nifi-hive-nar - 2.0.0-SNAPSHOT - nar - - - org.apache.nifi - nifi-hive-services-api-nar - 2.0.0-SNAPSHOT - nar - - - - - include-hive1_1 - - - false - - allProfiles - - - - - org.apache.nifi - nifi-hive_1_1-nar - 2.0.0-SNAPSHOT - nar - - - org.apache.nifi - nifi-hive-services-api-nar - 2.0.0-SNAPSHOT - nar - - - include-hive3 - - 4.0.0 - - - org.apache.nifi - nifi-hive-bundle - 2.0.0-SNAPSHOT - - - nifi-hive-nar - nar - - true - true - - ${hive.hadoop.version} - - - - - org.apache.nifi - nifi-hive-services-api-nar - 2.0.0-SNAPSHOT - nar - - - org.apache.nifi - nifi-hive-processors - 2.0.0-SNAPSHOT - - - diff --git a/nifi-nar-bundles/nifi-hive-bundle/nifi-hive-nar/src/main/resources/META-INF/LICENSE b/nifi-nar-bundles/nifi-hive-bundle/nifi-hive-nar/src/main/resources/META-INF/LICENSE deleted file mode 100644 index 78c5afab04..0000000000 --- a/nifi-nar-bundles/nifi-hive-bundle/nifi-hive-nar/src/main/resources/META-INF/LICENSE +++ /dev/null @@ -1,329 +0,0 @@ - - Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ - - TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - - 1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - - 2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - - 3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - - 4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - - 5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - - 6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - - 7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - - 8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - - 9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - - END OF TERMS AND CONDITIONS - - APPENDIX: How to apply the Apache License to your work. - - To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "[]" - replaced with your own identifying information. (Don't include - the brackets!) The text should be enclosed in the appropriate - comment syntax for the file format. We also recommend that a - file or class name and description of purpose be included on the - same "printed page" as the copyright notice for easier - identification within third-party archives. - - Copyright [yyyy] [name of copyright owner] - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. - -APACHE NIFI SUBCOMPONENTS: - -The Apache NiFi project contains subcomponents with separate copyright -notices and license terms. Your use of the source code for the these -subcomponents is subject to the terms and conditions of the following -licenses. - The binary distribution of this product bundles 'Bouncy Castle JDK 1.5' - under an MIT style license. - - Copyright (c) 2000 - 2015 The Legion of the Bouncy Castle Inc. (http://www.bouncycastle.org) - - Permission is hereby granted, free of charge, to any person obtaining a copy - of this software and associated documentation files (the "Software"), to deal - in the Software without restriction, including without limitation the rights - to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - copies of the Software, and to permit persons to whom the Software is - furnished to do so, subject to the following conditions: - - The above copyright notice and this permission notice shall be included in - all copies or substantial portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - THE SOFTWARE. - - The binary distribution of this product includes modules from Groovy which bundles ANTLR - SOFTWARE RIGHTS - - ANTLR 1989-2006 Developed by Terence Parr - Partially supported by University of San Francisco & jGuru.com - - We reserve no legal rights to the ANTLR--it is fully in the - public domain. An individual or company may do whatever - they wish with source code distributed with ANTLR or the - code generated by ANTLR, including the incorporation of - ANTLR, or its output, into commerical software. - - We encourage users to develop software with ANTLR. However, - we do ask that credit is given to us for developing - ANTLR. By "credit", we mean that if you use ANTLR or - incorporate any source code into one of your programs - (commercial product, research project, or otherwise) that - you acknowledge this fact somewhere in the documentation, - research report, etc... If you like ANTLR and have - developed a nice tool with the output, please mention that - you developed it using ANTLR. In addition, we ask that the - headers remain intact in our source code. As long as these - guidelines are kept, we expect to continue enhancing this - system and expect to make other tools available as they are - completed. - - The primary ANTLR guy: - - Terence Parr - parrt@cs.usfca.edu - parrt@antlr.org - - The binary distribution of this product includes modules from Groovy which bundles ASM - /*** - * http://asm.objectweb.org/ - * - * ASM: a very small and fast Java bytecode manipulation framework - * Copyright (c) 2000-2005 INRIA, France Telecom - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. Neither the name of the copyright holders nor the names of its - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF - * THE POSSIBILITY OF SUCH DAMAGE. - */ - - - The binary distribution of this product includes modules from Groovy which bundles source from JSR-223 - The following notice applies to the files: - - src/main/org/codehaus/groovy/jsr223/GroovyCompiledScript.java - src/main/org/codehaus/groovy/jsr223/GroovyScriptEngineFactory.java - src/main/org/codehaus/groovy/jsr223/GroovyScriptEngineImpl.java - - - /* - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - * - * Redistribution and use in source and binary forms, with or without modification, are - * permitted provided that the following conditions are met: Redistributions of source code - * must retain the above copyright notice, this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above copyright notice, this list of - * conditions and the following disclaimer in the documentation and/or other materials - * provided with the distribution. Neither the name of the Sun Microsystems nor the names of - * is contributors may be used to endorse or promote products derived from this software - * without specific prior written permission. - - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS - * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY - * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER - * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR - * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON - * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE - * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ diff --git a/nifi-nar-bundles/nifi-hive-bundle/nifi-hive-nar/src/main/resources/META-INF/NOTICE b/nifi-nar-bundles/nifi-hive-bundle/nifi-hive-nar/src/main/resources/META-INF/NOTICE deleted file mode 100644 index e4d6985fa1..0000000000 --- a/nifi-nar-bundles/nifi-hive-bundle/nifi-hive-nar/src/main/resources/META-INF/NOTICE +++ /dev/null @@ -1,348 +0,0 @@ -nifi-hive-nar -Copyright 2014-2023 The Apache Software Foundation - -This product includes software developed at -The Apache Software Foundation (http://www.apache.org/). - -This includes derived works from the Apache Storm (ASLv2 licensed) project (https://github.com/apache/storm): - Copyright 2015 The Apache Software Foundation - The derived work is adapted from - org/apache/storm/hive/common/HiveWriter.java - org/apache/storm/hive/common/HiveOptions.java - and can be found in the org.apache.nifi.util.hive package - -This includes derived works from the Apache Hive (ASLv2 licensed) project (https://github.com/apache/hive): - Copyright 2008-2016 The Apache Software Foundation - The derived work is adapted from - release-1.2.1/ql/src/java/org/apache/hadoop/hive/ql/io/orc/WriterImpl.java - and can be found in the org.apache.hadoop.hive.ql.io.orc package - -=========================================== -Apache Software License v2 -=========================================== - -The following binary components are provided under the Apache Software License v2 - - (ASLv2) Apache Ant - The following NOTICE information applies: - Apache Ant - Copyright 1999-2016 The Apache Software Foundation - - (ASLv2) Apache Commons Codec - The following NOTICE information applies: - Apache Commons Codec - Copyright 2002-2014 The Apache Software Foundation - - src/test/org/apache/commons/codec/language/DoubleMetaphoneTest.java - contains test data from http://aspell.net/test/orig/batch0.tab. - Copyright (C) 2002 Kevin Atkinson (kevina@gnu.org) - - =============================================================================== - - The content of package org.apache.commons.codec.language.bm has been translated - from the original php source code available at http://stevemorse.org/phoneticinfo.htm - with permission from the original authors. - Original source copyright: - Copyright (c) 2008 Alexander Beider & Stephen P. Morse. - - (ASLv2) Apache Commons DBCP - The following NOTICE information applies: - Apache Commons DBCP - Copyright 2001-2015 The Apache Software Foundation. - - (ASLv2) Apache Commons EL - The following NOTICE information applies: - Apache Commons EL - Copyright 1999-2016 The Apache Software Foundation - - EL-8 patch - Copyright 2004-2007 Jamie Taylor - http://issues.apache.org/jira/browse/EL-8 - - (ASLv2) Apache HttpComponents - The following NOTICE information applies: - Apache HttpComponents Client - Copyright 1999-2016 The Apache Software Foundation - Apache HttpComponents Core - HttpCore - Copyright 2006-2009 The Apache Software Foundation - - (ASLv2) Apache Commons Pool - The following NOTICE information applies: - Apache Commons Pool - Copyright 1999-2009 The Apache Software Foundation. - - (ASLv2) Apache Commons IO - The following NOTICE information applies: - Apache Commons IO - Copyright 2002-2016 The Apache Software Foundation - - (ASLv2) Apache Hive - The following NOTICE information applies: - Apache Hive - Copyright 2008-2015 The Apache Software Foundation - - This product includes software developed by The Apache Software - Foundation (http://www.apache.org/). - - This product includes Jersey (https://jersey.java.net/) - Copyright (c) 2010-2014 Oracle and/or its affiliates. - - This project includes software copyrighted by Microsoft Corporation and - licensed under the Apache License, Version 2.0. - - This project includes software copyrighted by Dell SecureWorks and - licensed under the Apache License, Version 2.0. - - (ASLv2) Jackson JSON processor - The following NOTICE information applies: - # Jackson JSON processor - - Jackson is a high-performance, Free/Open Source JSON processing library. - It was originally written by Tatu Saloranta (tatu.saloranta@iki.fi), and has - been in development since 2007. - It is currently developed by a community of developers, as well as supported - commercially by FasterXML.com. - - ## Licensing - - Jackson core and extension components may licensed under different licenses. - To find the details that apply to this artifact see the accompanying LICENSE file. - For more information, including possible other licensing options, contact - FasterXML.com (http://fasterxml.com). - - ## Credits - - A list of contributors may be found from CREDITS file, which is included - in some artifacts (usually source distributions); but is always available - from the source code management (SCM) system project uses. - - (ASLv2) BoneCP - The following NOTICE information applies: - BoneCP - Copyright 2010 Wallace Wadge - - (ASLv2) Apache Hadoop - The following NOTICE information applies: - The binary distribution of this product bundles binaries of - org.iq80.leveldb:leveldb-api (https://github.com/dain/leveldb), which has the - following notices: - * Copyright 2011 Dain Sundstrom - * Copyright 2011 FuseSource Corp. http://fusesource.com - - The binary distribution of this product bundles binaries of - org.fusesource.hawtjni:hawtjni-runtime (https://github.com/fusesource/hawtjni), - which has the following notices: - * This product includes software developed by FuseSource Corp. - http://fusesource.com - * This product includes software developed at - Progress Software Corporation and/or its subsidiaries or affiliates. - * This product includes software developed by IBM Corporation and others. - - (ASLv2) Apache HBase - The following NOTICE information applies: - Apache HBase - Copyright 2007-2015 The Apache Software Foundation - - -- - This product incorporates portions of the 'Hadoop' project - - Copyright 2007-2009 The Apache Software Foundation - - Licensed under the Apache License v2.0 - -- - Our Orca logo we got here: http://www.vectorfree.com/jumping-orca - It is licensed Creative Commons Attribution 3.0. - See https://creativecommons.org/licenses/by/3.0/us/ - We changed the logo by stripping the colored background, inverting - it and then rotating it some. - - Later we found that vectorfree.com image is not properly licensed. - The original is owned by vectorportal.com. The original was - relicensed so we could use it as Creative Commons Attribution 3.0. - The license is bundled with the download available here: - http://www.vectorportal.com/subcategory/205/KILLER-WHALE-FREE-VECTOR.eps/ifile/9136/detailtest.asp - -- - This product includes portions of the Bootstrap project v3.0.0 - - Copyright 2013 Twitter, Inc. - - Licensed under the Apache License v2.0 - - This product uses the Glyphicons Halflings icon set. - - http://glyphicons.com/ - - Copyright Jan Kovařík - - Licensed under the Apache License v2.0 as a part of the Bootstrap project. - - -- - This product includes portions of the Guava project v14, specifically - 'hbase-common/src/main/java/org/apache/hadoop/hbase/io/LimitInputStream.java' - - Copyright (C) 2007 The Guava Authors - - Licensed under the Apache License, Version 2.0 - - (ASLv2) Apache Commons Lang - The following NOTICE information applies: - Apache Commons Lang - Copyright 2001-2015 The Apache Software Foundation - - (ASLv2) Apache Curator - The following NOTICE information applies: - Apache Curator - Copyright 2013-2014 The Apache Software Foundation - - (ASLv2) Apache Derby - The following NOTICE information applies: - Apache Derby - Copyright 2004-2014 Apache, Apache DB, Apache Derby, Apache Torque, Apache JDO, Apache DDLUtils, - the Derby hat logo, the Apache JDO logo, and the Apache feather logo are trademarks of The Apache Software Foundation. - - (ASLv2) Apache DS - The following NOTICE information applies: - ApacheDS - Copyright 2003-2015 The Apache Software Foundation - - (ASLv2) Apache Geronimo - The following NOTICE information applies: - Apache Geronimo - Copyright 2003-2008 The Apache Software Foundation - - (ASLv2) HTrace Core - The following NOTICE information applies: - In addition, this product includes software dependencies. See - the accompanying LICENSE.txt for a listing of dependencies - that are NOT Apache licensed (with pointers to their licensing) - - Apache HTrace includes an Apache Thrift connector to Zipkin. Zipkin - is a distributed tracing system that is Apache 2.0 Licensed. - Copyright 2012 Twitter, Inc. - - (ASLv2) Jettison - The following NOTICE information applies: - Copyright 2006 Envoi Solutions LLC - - (ASLv2) Jetty - The following NOTICE information applies: - Jetty Web Container - Copyright 1995-2019 Mort Bay Consulting Pty Ltd. - - (ASLv2) Apache log4j - The following NOTICE information applies: - Apache log4j - Copyright 2007 The Apache Software Foundation - - (ASLv2) Parquet MR - The following NOTICE information applies: - Parquet MR - Copyright 2012 Twitter, Inc. - - This project includes code from https://github.com/lemire/JavaFastPFOR - parquet-column/src/main/java/parquet/column/values/bitpacking/LemireBitPacking.java - Apache License Version 2.0 http://www.apache.org/licenses/. - (c) Daniel Lemire, http://lemire.me/en/ - - (ASLv2) Apache Thrift - The following NOTICE information applies: - Apache Thrift - Copyright 2006-2010 The Apache Software Foundation. - - (ASLv2) Apache Twill - The following NOTICE information applies: - Apache Twill - Copyright 2013-2016 The Apache Software Foundation - - (ASLv2) Dropwizard Metrics - The following NOTICE information applies: - Metrics - Copyright 2010-2013 Coda Hale and Yammer, Inc. - - This product includes code derived from the JSR-166 project (ThreadLocalRandom, Striped64, - LongAdder), which was released with the following comments: - - Written by Doug Lea with assistance from members of JCP JSR-166 - Expert Group and released to the public domain, as explained at - http://creativecommons.org/publicdomain/zero/1.0/ - - (ASLv2) Joda Time - The following NOTICE information applies: - This product includes software developed by - Joda.org (http://www.joda.org/). - - (ASLv2) The Netty Project - The following NOTICE information applies: - The Netty Project - Copyright 2011 The Netty Project - - (ASLv2) Apache Tomcat - The following NOTICE information applies: - Apache Tomcat - Copyright 2007 The Apache Software Foundation - - Java Management Extensions (JMX) support is provided by - the MX4J package, which is open source software. The - original software and related information is available - at http://mx4j.sourceforge.net. - - Java compilation software for JSP pages is provided by Eclipse, - which is open source software. The orginal software and - related infomation is available at - http://www.eclipse.org. - - (ASLv2) Apache ZooKeeper - The following NOTICE information applies: - Apache ZooKeeper - Copyright 2009-2012 The Apache Software Foundation - - (ASLv2) Google GSON - The following NOTICE information applies: - Copyright 2008 Google Inc. - - (ASLv2) Groovy (org.codehaus.groovy:groovy-all:jar:2.1.6 - http://www.groovy-lang.org) - The following NOTICE information applies: - Groovy Language - Copyright 2003-2012 The respective authors and developers - Developers and Contributors are listed in the project POM file - and Gradle build file - - This product includes software developed by - The Groovy community (http://groovy.codehaus.org/). - - (ASLv2) JPam - The following NOTICE information applies: - Copyright 2003-2006 Greg Luck - - ************************ - Common Development and Distribution License 1.1 - ************************ - - The following binary components are provided under the Common Development and Distribution License 1.1. See project link for details. - - (CDDL 1.1) (GPL2 w/ CPE) jersey-client (com.sun.jersey:jersey-client:jar:1.9 - https://jersey.java.net) - (CDDL 1.1) (GPL2 w/ CPE) jersey-core (com.sun.jersey:jersey-core:jar:1.9 - https://jersey.java.net/) - (CDDL 1.1) (GPL2 w/ CPE) jersey-json (com.sun.jersey:jersey-json:jar:1.9 - https://jersey.java.net/) - (CDDL 1.1) (GPL2 w/ CPE) jersey-server (com.sun.jersey:jersey-server:jar:1.9 - https://jersey.java.net/) - (CDDL 1.1) (GPL2 w/ CPE) jersey-guice (com.sun.jersey.contribs:jersey-guice:jar:1.9 - https://jersey.java.net/) - (CDDL 1.1) (GPL2 w/ CPE) Java Architecture For XML Binding (javax.xml.bind:jaxb-api:jar:2.2.2 - https://jaxb.dev.java.net/) - (CDDL 1.1) (GPL2 w/ CPE) JavaMail API (compat) (javax.mail:mail:jar:1.4.7 - http://kenai.com/projects/javamail/mail) - - - ************************ - Common Development and Distribution License 1.0 - ************************ - - The following binary components are provided under the Common Development and Distribution License 1.0. See project link for details. - - (CDDL 1.0) JavaServlet(TM) Specification (javax.servlet:servlet-api:jar:2.5 - no url available) - (CDDL 1.0) (GPL3) Streaming API For XML (javax.xml.stream:stax-api:jar:1.0-2 - no url provided) - (CDDL 1.0) JavaBeans Activation Framework (JAF) (javax.activation:activation:jar:1.1 - http://java.sun.com/products/javabeans/jaf/index.jsp) - (CDDL 1.0) JavaServer Pages(TM) API (javax.servlet.jsp:jsp-api:jar:2.1 - http://jsp.java.net) - - ***************** - Public Domain - ***************** - - The following binary components are provided to the 'Public Domain'. See project link for details. - - (Public Domain) AOP Alliance 1.0 (http://aopalliance.sourceforge.net/) diff --git a/nifi-nar-bundles/nifi-hive-bundle/nifi-hive-processors/pom.xml b/nifi-nar-bundles/nifi-hive-bundle/nifi-hive-processors/pom.xml deleted file mode 100644 index 0251a5432d..0000000000 --- a/nifi-nar-bundles/nifi-hive-bundle/nifi-hive-processors/pom.xml +++ /dev/null @@ -1,225 +0,0 @@ - - - - 4.0.0 - - - org.apache.nifi - nifi-hive-bundle - 2.0.0-SNAPSHOT - - - nifi-hive-processors - jar - - - - ${hive12.hadoop.version} - - - - - org.apache.nifi - nifi-api - 2.0.0-SNAPSHOT - - - org.apache.nifi - nifi-utils - 2.0.0-SNAPSHOT - - - org.apache.nifi - nifi-put-pattern - 2.0.0-SNAPSHOT - - - org.apache.nifi - nifi-security-kerberos - 2.0.0-SNAPSHOT - - - org.apache.nifi - nifi-dbcp-service-api - 2.0.0-SNAPSHOT - provided - - - org.apache.nifi - nifi-hive-services-api - 2.0.0-SNAPSHOT - provided - - - org.apache.nifi - nifi-kerberos-credentials-service-api - provided - - - org.apache.avro - avro - - - org.apache.hive - hive-jdbc - ${hive12.version} - - - org.json - json - - - log4j - log4j - - - log4j - apache-log4j-extras - - - org.slf4j - slf4j-log4j12 - - - commons-logging - commons-logging - - - - - org.apache.hive.hcatalog - hive-hcatalog-streaming - ${hive12.version} - - - org.slf4j - slf4j-log4j12 - - - log4j - log4j - - - log4j - apache-log4j-extras - - - commons-logging - commons-logging - - - - - - org.codehaus.groovy - groovy-all - 2.4.21 - - - org.apache.hive.hcatalog - hive-hcatalog-core - ${hive12.version} - - - org.slf4j - slf4j-log4j12 - - - commons-logging - commons-logging - - - - - org.apache.hadoop - hadoop-client - ${hadoop.version} - - - com.google.code.findbugs - jsr305 - - - org.slf4j - slf4j-log4j12 - - - log4j - log4j - - - commons-logging - commons-logging - - - - - org.apache.nifi - nifi-hadoop-utils - 2.0.0-SNAPSHOT - - - org.apache.nifi - nifi-hadoop-record-utils - 2.0.0-SNAPSHOT - - - org.apache.nifi - nifi-record-serialization-service-api - - - org.apache.nifi - nifi-record - - - com.github.stephenc.findbugs - findbugs-annotations - 1.3.9-1 - - - org.apache.commons - commons-text - - - org.apache.commons - commons-dbcp2 - - - org.slf4j - log4j-over-slf4j - - - org.slf4j - jcl-over-slf4j - - - org.apache.nifi - nifi-mock - 2.0.0-SNAPSHOT - test - - - org.apache.nifi - nifi-mock-record-utils - 2.0.0-SNAPSHOT - test - - - org.xerial.snappy - snappy-java - - - diff --git a/nifi-nar-bundles/nifi-hive-bundle/nifi-hive-processors/src/main/java/org/apache/hadoop/hive/ql/io/orc/NiFiOrcUtils.java b/nifi-nar-bundles/nifi-hive-bundle/nifi-hive-processors/src/main/java/org/apache/hadoop/hive/ql/io/orc/NiFiOrcUtils.java deleted file mode 100644 index d6566ec4c8..0000000000 --- a/nifi-nar-bundles/nifi-hive-bundle/nifi-hive-processors/src/main/java/org/apache/hadoop/hive/ql/io/orc/NiFiOrcUtils.java +++ /dev/null @@ -1,612 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.hadoop.hive.ql.io.orc; - -import org.apache.avro.Schema; -import org.apache.avro.generic.GenericData; -import org.apache.avro.util.Utf8; -import org.apache.commons.lang3.StringUtils; -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.Path; -import org.apache.hadoop.hive.common.type.HiveDecimal; -import org.apache.hadoop.hive.conf.HiveConf; -import org.apache.hadoop.hive.ql.io.filters.BloomFilterIO; -import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable; -import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; -import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory; -import org.apache.hadoop.hive.serde2.objectinspector.SettableStructObjectInspector; -import org.apache.hadoop.hive.serde2.objectinspector.StructField; -import org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo; -import org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo; -import org.apache.hadoop.hive.serde2.typeinfo.MapTypeInfo; -import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; -import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; -import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; -import org.apache.hadoop.hive.serde2.typeinfo.UnionTypeInfo; -import org.apache.hadoop.io.BooleanWritable; -import org.apache.hadoop.io.BytesWritable; -import org.apache.hadoop.io.DoubleWritable; -import org.apache.hadoop.io.FloatWritable; -import org.apache.hadoop.io.IntWritable; -import org.apache.hadoop.io.LongWritable; -import org.apache.hadoop.io.Text; -import org.apache.nifi.serialization.record.DataType; -import org.apache.nifi.serialization.record.RecordField; -import org.apache.nifi.serialization.record.RecordFieldType; -import org.apache.nifi.serialization.record.type.ArrayDataType; -import org.apache.nifi.serialization.record.type.ChoiceDataType; -import org.apache.nifi.serialization.record.type.MapDataType; -import org.apache.nifi.serialization.record.type.RecordDataType; - -import java.io.IOException; -import java.io.OutputStream; -import java.math.BigDecimal; -import java.nio.ByteBuffer; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.HashMap; -import java.util.List; -import java.util.Map; -import java.util.stream.Collectors; -import java.util.stream.IntStream; - -import static org.apache.hadoop.hive.conf.HiveConf.ConfVars.HIVE_ORC_DEFAULT_BLOCK_PADDING; -import static org.apache.hadoop.hive.conf.HiveConf.ConfVars.HIVE_ORC_DEFAULT_BLOCK_SIZE; -import static org.apache.hadoop.hive.conf.HiveConf.ConfVars.HIVE_ORC_DEFAULT_ROW_INDEX_STRIDE; -import static org.apache.hadoop.hive.conf.HiveConf.ConfVars.HIVE_ORC_WRITE_FORMAT; - -/** - * Utility methods for ORC support (conversion from Avro, conversion to Hive types, e.g. - */ -public class NiFiOrcUtils { - - public static Object convertToORCObject(TypeInfo typeInfo, Object o) { - if (o != null) { - if (typeInfo instanceof UnionTypeInfo) { - OrcUnion union = new OrcUnion(); - // Avro uses Utf8 and GenericData.EnumSymbol objects instead of Strings. This is handled in other places in the method, but here - // we need to determine the union types from the objects, so choose String.class if the object is one of those Avro classes - Class clazzToCompareTo = o.getClass(); - if (o instanceof org.apache.avro.util.Utf8 || o instanceof GenericData.EnumSymbol) { - clazzToCompareTo = String.class; - } - // Need to find which of the union types correspond to the primitive object - TypeInfo objectTypeInfo = TypeInfoUtils.getTypeInfoFromObjectInspector( - ObjectInspectorFactory.getReflectionObjectInspector(clazzToCompareTo, ObjectInspectorFactory.ObjectInspectorOptions.JAVA)); - List unionTypeInfos = ((UnionTypeInfo) typeInfo).getAllUnionObjectTypeInfos(); - - int index = 0; - while (index < unionTypeInfos.size() && !unionTypeInfos.get(index).equals(objectTypeInfo)) { - index++; - } - if (index < unionTypeInfos.size()) { - union.set((byte) index, convertToORCObject(objectTypeInfo, o)); - } else { - throw new IllegalArgumentException("Object Type for class " + o.getClass().getName() + " not in Union declaration"); - } - return union; - } - if (o instanceof Integer) { - return new IntWritable((int) o); - } - if (o instanceof Boolean) { - return new BooleanWritable((boolean) o); - } - if (o instanceof Long) { - return new LongWritable((long) o); - } - if (o instanceof Float) { - return new FloatWritable((float) o); - } - if (o instanceof Double) { - return new DoubleWritable((double) o); - } - if (o instanceof BigDecimal) { - return new HiveDecimalWritable(HiveDecimal.create((BigDecimal) o)); - } - if (o instanceof String || o instanceof Utf8 || o instanceof GenericData.EnumSymbol) { - return new Text(o.toString()); - } - if (o instanceof ByteBuffer && typeInfo instanceof DecimalTypeInfo) { - ByteBuffer buffer = (ByteBuffer) o; - return new HiveDecimalWritable(buffer.array(), ((DecimalTypeInfo) typeInfo).scale()); - } - if (o instanceof ByteBuffer) { - return new BytesWritable(((ByteBuffer) o).array()); - } - if (o instanceof int[]) { - int[] intArray = (int[]) o; - return Arrays.stream(intArray) - .mapToObj((element) -> convertToORCObject(TypeInfoFactory.getPrimitiveTypeInfo("int"), element)) - .collect(Collectors.toList()); - } - if (o instanceof long[]) { - long[] longArray = (long[]) o; - return Arrays.stream(longArray) - .mapToObj((element) -> convertToORCObject(TypeInfoFactory.getPrimitiveTypeInfo("bigint"), element)) - .collect(Collectors.toList()); - } - if (o instanceof float[]) { - float[] floatArray = (float[]) o; - return IntStream.range(0, floatArray.length) - .mapToDouble(i -> floatArray[i]) - .mapToObj((element) -> convertToORCObject(TypeInfoFactory.getPrimitiveTypeInfo("float"), (float) element)) - .collect(Collectors.toList()); - } - if (o instanceof double[]) { - double[] doubleArray = (double[]) o; - return Arrays.stream(doubleArray) - .mapToObj((element) -> convertToORCObject(TypeInfoFactory.getPrimitiveTypeInfo("double"), element)) - .collect(Collectors.toList()); - } - if (o instanceof boolean[]) { - boolean[] booleanArray = (boolean[]) o; - return IntStream.range(0, booleanArray.length) - .map(i -> booleanArray[i] ? 1 : 0) - .mapToObj((element) -> convertToORCObject(TypeInfoFactory.getPrimitiveTypeInfo("boolean"), element == 1)) - .collect(Collectors.toList()); - } - if (o instanceof GenericData.Array) { - GenericData.Array array = ((GenericData.Array) o); - // The type information in this case is interpreted as a List - TypeInfo listTypeInfo = ((ListTypeInfo) typeInfo).getListElementTypeInfo(); - return array.stream().map((element) -> convertToORCObject(listTypeInfo, element)).collect(Collectors.toList()); - } - if (o instanceof List) { - return o; - } - if (o instanceof Map) { - Map map = new HashMap(); - TypeInfo keyInfo = ((MapTypeInfo) typeInfo).getMapKeyTypeInfo(); - TypeInfo valueInfo = ((MapTypeInfo) typeInfo).getMapValueTypeInfo(); - // Unions are not allowed as key/value types, so if we convert the key and value objects, - // they should return Writable objects - ((Map) o).forEach((key, value) -> { - Object keyObject = convertToORCObject(keyInfo, key); - Object valueObject = convertToORCObject(valueInfo, value); - if (keyObject == null) { - throw new IllegalArgumentException("Maps' key cannot be null"); - } - map.put(keyObject, valueObject); - }); - return map; - } - if (o instanceof GenericData.Record) { - GenericData.Record record = (GenericData.Record) o; - TypeInfo recordSchema = NiFiOrcUtils.getOrcField(record.getSchema()); - List recordFields = record.getSchema().getFields(); - if (recordFields != null) { - Object[] fieldObjects = new Object[recordFields.size()]; - for (int i = 0; i < recordFields.size(); i++) { - Schema.Field field = recordFields.get(i); - Schema fieldSchema = field.schema(); - Object fieldObject = record.get(field.name()); - fieldObjects[i] = NiFiOrcUtils.convertToORCObject(NiFiOrcUtils.getOrcField(fieldSchema), fieldObject); - } - return NiFiOrcUtils.createOrcStruct(recordSchema, fieldObjects); - } - } - throw new IllegalArgumentException("Error converting object of type " + o.getClass().getName() + " to ORC type " + typeInfo.getTypeName()); - } else { - return null; - } - } - - - /** - * Create an object of OrcStruct given a TypeInfo and a list of objects - * - * @param typeInfo The TypeInfo object representing the ORC record schema - * @param objs ORC objects/Writables - * @return an OrcStruct containing the specified objects for the specified schema - */ - public static OrcStruct createOrcStruct(TypeInfo typeInfo, Object... objs) { - SettableStructObjectInspector oi = (SettableStructObjectInspector) OrcStruct - .createObjectInspector(typeInfo); - List fields = (List) oi.getAllStructFieldRefs(); - OrcStruct result = (OrcStruct) oi.create(); - result.setNumFields(fields.size()); - for (int i = 0; i < fields.size(); i++) { - oi.setStructFieldData(result, fields.get(i), objs[i]); - } - return result; - } - - public static String normalizeHiveTableName(String name) { - return name.replaceAll("[\\. ]", "_"); - } - - public static String generateHiveDDL(Schema avroSchema, String tableName) { - Schema.Type schemaType = avroSchema.getType(); - StringBuilder sb = new StringBuilder("CREATE EXTERNAL TABLE IF NOT EXISTS "); - sb.append(tableName); - sb.append(" ("); - if (Schema.Type.RECORD.equals(schemaType)) { - List hiveColumns = new ArrayList<>(); - List fields = avroSchema.getFields(); - if (fields != null) { - hiveColumns.addAll( - fields.stream().map(field -> field.name() + " " + getHiveTypeFromAvroType(field.schema())).collect(Collectors.toList())); - } - sb.append(StringUtils.join(hiveColumns, ", ")); - sb.append(") STORED AS ORC"); - return sb.toString(); - } else { - throw new IllegalArgumentException("Avro schema is of type " + schemaType.getName() + ", not RECORD"); - } - } - - - public static TypeInfo getOrcField(Schema fieldSchema) throws IllegalArgumentException { - Schema.Type fieldType = fieldSchema.getType(); - - switch (fieldType) { - case INT: - case LONG: - case BOOLEAN: - case DOUBLE: - case FLOAT: - case STRING: - case NULL: - return getPrimitiveOrcTypeFromPrimitiveAvroType(fieldType); - case BYTES: - if (isLogicalType(fieldSchema)){ - return getLogicalTypeInfo(fieldSchema); - } else { - return getPrimitiveOrcTypeFromPrimitiveAvroType(fieldType); - } - case UNION: - List unionFieldSchemas = fieldSchema.getTypes(); - - if (unionFieldSchemas != null) { - // Ignore null types in union - List orcFields = unionFieldSchemas.stream().filter( - unionFieldSchema -> !Schema.Type.NULL.equals(unionFieldSchema.getType())) - .map(NiFiOrcUtils::getOrcField) - .collect(Collectors.toList()); - - // Flatten the field if the union only has one non-null element - if (orcFields.size() == 1) { - return orcFields.get(0); - } else { - return TypeInfoFactory.getUnionTypeInfo(orcFields); - } - } - return null; - - case ARRAY: - return TypeInfoFactory.getListTypeInfo(getOrcField(fieldSchema.getElementType())); - - case MAP: - return TypeInfoFactory.getMapTypeInfo( - getPrimitiveOrcTypeFromPrimitiveAvroType(Schema.Type.STRING), - getOrcField(fieldSchema.getValueType())); - - case RECORD: - List avroFields = fieldSchema.getFields(); - if (avroFields != null) { - List orcFieldNames = new ArrayList<>(avroFields.size()); - List orcFields = new ArrayList<>(avroFields.size()); - avroFields.forEach(avroField -> { - String fieldName = avroField.name(); - orcFieldNames.add(fieldName); - orcFields.add(getOrcField(avroField.schema())); - }); - return TypeInfoFactory.getStructTypeInfo(orcFieldNames, orcFields); - } - return null; - - case ENUM: - // An enum value is just a String for ORC/Hive - return getPrimitiveOrcTypeFromPrimitiveAvroType(Schema.Type.STRING); - - default: - throw new IllegalArgumentException("Did not recognize Avro type " + fieldType.getName()); - } - - } - - private static boolean isLogicalType(Schema schema){ - return schema.getProp("logicalType") != null; - } - - private static TypeInfo getLogicalTypeInfo(Schema schema){ - String type = schema.getProp("logicalType"); - switch (type){ - case "decimal": - int precision = schema.getObjectProp("precision") != null - ? Integer.valueOf(schema.getObjectProp("precision").toString()) - : 10; - int scale = schema.getObjectProp("scale") != null - ? Integer.valueOf(schema.getObjectProp("scale").toString()) - : 2; - return new DecimalTypeInfo(precision, scale); - } - throw new IllegalArgumentException("Logical type " + type + " is not supported!"); - } - - public static Schema.Type getAvroSchemaTypeOfObject(Object o) { - if (o == null) { - return Schema.Type.NULL; - } else if (o instanceof Integer) { - return Schema.Type.INT; - } else if (o instanceof Long) { - return Schema.Type.LONG; - } else if (o instanceof Boolean) { - return Schema.Type.BOOLEAN; - } else if (o instanceof byte[]) { - return Schema.Type.BYTES; - } else if (o instanceof Float) { - return Schema.Type.FLOAT; - } else if (o instanceof Double) { - return Schema.Type.DOUBLE; - } else if (o instanceof Enum) { - return Schema.Type.ENUM; - } else if (o instanceof Object[]) { - return Schema.Type.ARRAY; - } else if (o instanceof List) { - return Schema.Type.ARRAY; - } else if (o instanceof Map) { - return Schema.Type.MAP; - } else { - throw new IllegalArgumentException("Object of class " + o.getClass() + " is not a supported Avro Type"); - } - } - - public static TypeInfo getPrimitiveOrcTypeFromPrimitiveAvroType(Schema.Type avroType) throws IllegalArgumentException { - if (avroType == null) { - throw new IllegalArgumentException("Avro type is null"); - } - switch (avroType) { - case INT: - return TypeInfoFactory.getPrimitiveTypeInfo("int"); - case LONG: - return TypeInfoFactory.getPrimitiveTypeInfo("bigint"); - case BOOLEAN: - case NULL: // ORC has no null type, so just pick the smallest. All values are necessarily null. - return TypeInfoFactory.getPrimitiveTypeInfo("boolean"); - case BYTES: - return TypeInfoFactory.getPrimitiveTypeInfo("binary"); - case DOUBLE: - return TypeInfoFactory.getPrimitiveTypeInfo("double"); - case FLOAT: - return TypeInfoFactory.getPrimitiveTypeInfo("float"); - case STRING: - return TypeInfoFactory.getPrimitiveTypeInfo("string"); - default: - throw new IllegalArgumentException("Avro type " + avroType.getName() + " is not a primitive type"); - } - } - - public static String getHiveTypeFromAvroType(Schema avroSchema) { - if (avroSchema == null) { - throw new IllegalArgumentException("Avro schema is null"); - } - - Schema.Type avroType = avroSchema.getType(); - - switch (avroType) { - case INT: - return "INT"; - case LONG: - return "BIGINT"; - case BOOLEAN: - case NULL: // Hive has no null type, we picked boolean as the ORC type so use it for Hive DDL too. All values are necessarily null. - return "BOOLEAN"; - case BYTES: - if (isLogicalType(avroSchema)){ - return getLogicalTypeInfo(avroSchema).toString().toUpperCase(); - } else { - return "BINARY"; - } - case DOUBLE: - return "DOUBLE"; - case FLOAT: - return "FLOAT"; - case STRING: - case ENUM: - return "STRING"; - case UNION: - List unionFieldSchemas = avroSchema.getTypes(); - if (unionFieldSchemas != null) { - List hiveFields = new ArrayList<>(); - for (Schema unionFieldSchema : unionFieldSchemas) { - Schema.Type unionFieldSchemaType = unionFieldSchema.getType(); - // Ignore null types in union - if (!Schema.Type.NULL.equals(unionFieldSchemaType)) { - hiveFields.add(getHiveTypeFromAvroType(unionFieldSchema)); - } - } - // Flatten the field if the union only has one non-null element - return (hiveFields.size() == 1) - ? hiveFields.get(0) - : "UNIONTYPE<" + StringUtils.join(hiveFields, ", ") + ">"; - - } - break; - case MAP: - return "MAP"; - case ARRAY: - return "ARRAY<" + getHiveTypeFromAvroType(avroSchema.getElementType()) + ">"; - case RECORD: - List recordFields = avroSchema.getFields(); - if (recordFields != null) { - List hiveFields = recordFields.stream().map( - recordField -> recordField.name() + ":" + getHiveTypeFromAvroType(recordField.schema())).collect(Collectors.toList()); - return "STRUCT<" + StringUtils.join(hiveFields, ", ") + ">"; - } - break; - default: - break; - } - - throw new IllegalArgumentException("Error converting Avro type " + avroType.getName() + " to Hive type"); - } - - public static String getHiveTypeFromFieldType(DataType rawDataType, boolean hiveFieldNames) { - if (rawDataType == null) { - throw new IllegalArgumentException("Field type is null"); - } - RecordFieldType dataType = rawDataType.getFieldType(); - - if (RecordFieldType.INT.equals(dataType)) { - return "INT"; - } - if (RecordFieldType.LONG.equals(dataType)) { - return "BIGINT"; - } - if (RecordFieldType.BOOLEAN.equals(dataType)) { - return "BOOLEAN"; - } - if (RecordFieldType.DOUBLE.equals(dataType)) { - return "DOUBLE"; - } - if (RecordFieldType.FLOAT.equals(dataType)) { - return "FLOAT"; - } - if (RecordFieldType.DECIMAL.equals(dataType)) { - return "DECIMAL"; - } - if (RecordFieldType.STRING.equals(dataType) || RecordFieldType.ENUM.equals(dataType)) { - return "STRING"; - } - if (RecordFieldType.DATE.equals(dataType)) { - return "DATE"; - } - if (RecordFieldType.TIME.equals(dataType)) { - return "INT"; - } - if (RecordFieldType.TIMESTAMP.equals(dataType)) { - return "TIMESTAMP"; - } - if (RecordFieldType.ARRAY.equals(dataType)) { - ArrayDataType arrayDataType = (ArrayDataType) rawDataType; - if (RecordFieldType.BYTE.getDataType().equals(arrayDataType.getElementType())) { - return "BINARY"; - } - return "ARRAY<" + getHiveTypeFromFieldType(arrayDataType.getElementType(), hiveFieldNames) + ">"; - } - if (RecordFieldType.MAP.equals(dataType)) { - MapDataType mapDataType = (MapDataType) rawDataType; - return "MAP"; - } - if (RecordFieldType.CHOICE.equals(dataType)) { - ChoiceDataType choiceDataType = (ChoiceDataType) rawDataType; - List unionFieldSchemas = choiceDataType.getPossibleSubTypes(); - - if (unionFieldSchemas != null) { - // Ignore null types in union - List hiveFields = unionFieldSchemas.stream() - .map((it) -> getHiveTypeFromFieldType(it, hiveFieldNames)) - .collect(Collectors.toList()); - - // Flatten the field if the union only has one non-null element - return (hiveFields.size() == 1) - ? hiveFields.get(0) - : "UNIONTYPE<" + StringUtils.join(hiveFields, ", ") + ">"; - } - return null; - } - - if (RecordFieldType.RECORD.equals(dataType)) { - RecordDataType recordDataType = (RecordDataType) rawDataType; - List recordFields = recordDataType.getChildSchema().getFields(); - if (recordFields != null) { - List hiveFields = recordFields.stream().map( - recordField -> ("`" + (hiveFieldNames ? recordField.getFieldName().toLowerCase() : recordField.getFieldName()) + "`:" - + getHiveTypeFromFieldType(recordField.getDataType(), hiveFieldNames))).collect(Collectors.toList()); - return "STRUCT<" + StringUtils.join(hiveFields, ", ") + ">"; - } - return null; - } - - throw new IllegalArgumentException("Error converting Avro type " + dataType.name() + " to Hive type"); - } - - - public static OrcFlowFileWriter createWriter(OutputStream flowFileOutputStream, - Path path, - Configuration conf, - TypeInfo orcSchema, - long stripeSize, - CompressionKind compress, - int bufferSize) throws IOException { - - int rowIndexStride = HiveConf.getIntVar(conf, HIVE_ORC_DEFAULT_ROW_INDEX_STRIDE); - - boolean addBlockPadding = HiveConf.getBoolVar(conf, HIVE_ORC_DEFAULT_BLOCK_PADDING); - - String versionName = HiveConf.getVar(conf, HIVE_ORC_WRITE_FORMAT); - OrcFile.Version versionValue = (versionName == null) - ? OrcFile.Version.CURRENT - : OrcFile.Version.byName(versionName); - - OrcFile.EncodingStrategy encodingStrategy; - String enString = conf.get(HiveConf.ConfVars.HIVE_ORC_ENCODING_STRATEGY.varname); - if (enString == null) { - encodingStrategy = OrcFile.EncodingStrategy.SPEED; - } else { - encodingStrategy = OrcFile.EncodingStrategy.valueOf(enString); - } - - OrcFile.CompressionStrategy compressionStrategy; - String compString = conf.get(HiveConf.ConfVars.HIVE_ORC_COMPRESSION_STRATEGY.varname); - if (compString == null) { - compressionStrategy = OrcFile.CompressionStrategy.SPEED; - } else { - compressionStrategy = OrcFile.CompressionStrategy.valueOf(compString); - } - - float paddingTolerance; - paddingTolerance = conf.getFloat(HiveConf.ConfVars.HIVE_ORC_BLOCK_PADDING_TOLERANCE.varname, - HiveConf.ConfVars.HIVE_ORC_BLOCK_PADDING_TOLERANCE.defaultFloatVal); - - long blockSizeValue = HiveConf.getLongVar(conf, HIVE_ORC_DEFAULT_BLOCK_SIZE); - - double bloomFilterFpp = BloomFilterIO.DEFAULT_FPP; - - ObjectInspector inspector = OrcStruct.createObjectInspector(orcSchema); - - return new OrcFlowFileWriter(flowFileOutputStream, - path, - conf, - inspector, - stripeSize, - compress, - bufferSize, - rowIndexStride, - getMemoryManager(conf), - addBlockPadding, - versionValue, - null, // no callback - encodingStrategy, - compressionStrategy, - paddingTolerance, - blockSizeValue, - null, // no Bloom Filter column names - bloomFilterFpp); - } - - private static MemoryManager memoryManager = null; - - private static synchronized MemoryManager getMemoryManager(Configuration conf) { - if (memoryManager == null) { - memoryManager = new MemoryManager(conf); - } - return memoryManager; - } -} diff --git a/nifi-nar-bundles/nifi-hive-bundle/nifi-hive-processors/src/main/java/org/apache/hadoop/hive/ql/io/orc/OrcFlowFileWriter.java b/nifi-nar-bundles/nifi-hive-bundle/nifi-hive-processors/src/main/java/org/apache/hadoop/hive/ql/io/orc/OrcFlowFileWriter.java deleted file mode 100644 index c868b91a52..0000000000 --- a/nifi-nar-bundles/nifi-hive-bundle/nifi-hive-processors/src/main/java/org/apache/hadoop/hive/ql/io/orc/OrcFlowFileWriter.java +++ /dev/null @@ -1,2649 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.hadoop.hive.ql.io.orc; - -import com.google.common.annotations.VisibleForTesting; -import com.google.common.base.Joiner; -import com.google.common.collect.Lists; -import com.google.common.primitives.Longs; -import com.google.protobuf.ByteString; -import com.google.protobuf.CodedOutputStream; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.Path; -import org.apache.hadoop.hive.common.JavaUtils; -import org.apache.hadoop.hive.common.type.HiveDecimal; -import org.apache.hadoop.hive.conf.HiveConf; -import org.apache.hadoop.hive.ql.exec.vector.ColumnVector; -import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; -import org.apache.hadoop.hive.ql.io.IOConstants; -import org.apache.hadoop.hive.ql.io.filters.BloomFilterIO; -import org.apache.hadoop.hive.ql.io.orc.CompressionCodec.Modifier; -import org.apache.hadoop.hive.ql.io.orc.OrcFile.CompressionStrategy; -import org.apache.hadoop.hive.ql.io.orc.OrcFile.EncodingStrategy; -import org.apache.hadoop.hive.ql.io.orc.OrcProto.RowIndexEntry; -import org.apache.hadoop.hive.ql.io.orc.OrcProto.StripeStatistics; -import org.apache.hadoop.hive.ql.io.orc.OrcProto.Type; -import org.apache.hadoop.hive.ql.io.orc.OrcProto.UserMetadataItem; -import org.apache.hadoop.hive.ql.util.JavaDataModel; -import org.apache.hadoop.hive.serde2.io.DateWritable; -import org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector; -import org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector; -import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; -import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; -import org.apache.hadoop.hive.serde2.objectinspector.StructField; -import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; -import org.apache.hadoop.hive.serde2.objectinspector.UnionObjectInspector; -import org.apache.hadoop.hive.serde2.objectinspector.primitive.BinaryObjectInspector; -import org.apache.hadoop.hive.serde2.objectinspector.primitive.BooleanObjectInspector; -import org.apache.hadoop.hive.serde2.objectinspector.primitive.ByteObjectInspector; -import org.apache.hadoop.hive.serde2.objectinspector.primitive.DateObjectInspector; -import org.apache.hadoop.hive.serde2.objectinspector.primitive.DoubleObjectInspector; -import org.apache.hadoop.hive.serde2.objectinspector.primitive.FloatObjectInspector; -import org.apache.hadoop.hive.serde2.objectinspector.primitive.HiveCharObjectInspector; -import org.apache.hadoop.hive.serde2.objectinspector.primitive.HiveDecimalObjectInspector; -import org.apache.hadoop.hive.serde2.objectinspector.primitive.HiveVarcharObjectInspector; -import org.apache.hadoop.hive.serde2.objectinspector.primitive.IntObjectInspector; -import org.apache.hadoop.hive.serde2.objectinspector.primitive.LongObjectInspector; -import org.apache.hadoop.hive.serde2.objectinspector.primitive.ShortObjectInspector; -import org.apache.hadoop.hive.serde2.objectinspector.primitive.StringObjectInspector; -import org.apache.hadoop.hive.serde2.objectinspector.primitive.TimestampObjectInspector; -import org.apache.hadoop.hive.serde2.typeinfo.CharTypeInfo; -import org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo; -import org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeInfo; -import org.apache.hadoop.io.BytesWritable; -import org.apache.hadoop.io.Text; -import org.apache.nifi.stream.io.ByteCountingOutputStream; - -import java.io.IOException; -import java.io.OutputStream; -import java.lang.management.ManagementFactory; -import java.nio.ByteBuffer; -import java.sql.Timestamp; -import java.util.ArrayList; -import java.util.EnumSet; -import java.util.List; -import java.util.Map; -import java.util.Objects; -import java.util.TimeZone; -import java.util.TreeMap; -import java.util.function.Function; -import java.util.stream.Collectors; - -/** - * An ORC file writer. The file is divided into stripes, which is the natural - * unit of work when reading. Each stripe is buffered in memory until the - * memory reaches the stripe size and then it is written out broken down by - * columns. Each column is written by a TreeWriter that is specific to that - * type of column. TreeWriters may have children TreeWriters that handle the - * sub-types. Each of the TreeWriters writes the column's data as a set of - * streams. - *

- * This class is synchronized so that multi-threaded access is ok. In - * particular, because the MemoryManager is shared between writers, this class - * assumes that checkMemory may be called from a separate thread. - */ -public class OrcFlowFileWriter implements Writer, MemoryManager.Callback { - - private static final Log LOG = LogFactory.getLog(WriterImpl.class); - - private static final int HDFS_BUFFER_SIZE = 256 * 1024; - private static final int MIN_ROW_INDEX_STRIDE = 1000; - - // threshold above which buffer size will be automatically resized - private static final int COLUMN_COUNT_THRESHOLD = 1000; - - private final Path path; - private final long defaultStripeSize; - private long adjustedStripeSize; - private final int rowIndexStride; - private final CompressionKind compress; - private final CompressionCodec codec; - private final boolean addBlockPadding; - private final int bufferSize; - private final long blockSize; - private final float paddingTolerance; - // the streams that make up the current stripe - private final Map streams = new TreeMap<>(); - - private final OutputStream flowFileOutputStream; - - private ByteCountingOutputStream rawWriter = null; - // the compressed metadata information outStream - private OutStream writer = null; - // a protobuf outStream around streamFactory - private CodedOutputStream protobufWriter = null; - private long headerLength; - private int columnCount; - private long rowCount = 0; - private long rowsInStripe = 0; - private long rawDataSize = 0; - private int rowsInIndex = 0; - private int stripesAtLastFlush = -1; - private final List stripes = new ArrayList<>(); - private final Map userMetadata = new TreeMap<>(); - private final StreamFactory streamFactory = new StreamFactory(); - private final TreeWriter treeWriter; - private final boolean buildIndex; - private final MemoryManager memoryManager; - private final OrcFile.Version version; - private final Configuration conf; - private final OrcFile.WriterCallback callback; - private final OrcFile.WriterContext callbackContext; - private final OrcFile.EncodingStrategy encodingStrategy; - private final OrcFile.CompressionStrategy compressionStrategy; - private final boolean[] bloomFilterColumns; - private final double bloomFilterFpp; - - - public OrcFlowFileWriter(OutputStream flowFileOutputStream, - Path path, - Configuration conf, - ObjectInspector inspector, - long stripeSize, - CompressionKind compress, - int bufferSize, - int rowIndexStride, - MemoryManager memoryManager, - boolean addBlockPadding, - OrcFile.Version version, - OrcFile.WriterCallback callback, - EncodingStrategy encodingStrategy, - CompressionStrategy compressionStrategy, - float paddingTolerance, - long blockSizeValue, - String bloomFilterColumnNames, - double bloomFilterFpp) throws IOException { - this.flowFileOutputStream = flowFileOutputStream; - this.path = path; - this.conf = conf; - this.callback = callback; - callbackContext = (callback != null) ? () -> OrcFlowFileWriter.this : null; - this.adjustedStripeSize = stripeSize; - this.defaultStripeSize = stripeSize; - this.version = version; - this.encodingStrategy = encodingStrategy; - this.compressionStrategy = compressionStrategy; - this.addBlockPadding = addBlockPadding; - this.blockSize = blockSizeValue; - this.paddingTolerance = paddingTolerance; - this.compress = compress; - this.rowIndexStride = rowIndexStride; - this.memoryManager = memoryManager; - buildIndex = rowIndexStride > 0; - codec = createCodec(compress); - String allColumns = conf.get(IOConstants.COLUMNS); - if (allColumns == null) { - allColumns = getColumnNamesFromInspector(inspector); - } - this.bufferSize = getEstimatedBufferSize(allColumns, bufferSize); - if (version == OrcFile.Version.V_0_11) { - /* do not write bloom filters for ORC v11 */ - this.bloomFilterColumns = - OrcUtils.includeColumns(null, allColumns, inspector); - } else { - this.bloomFilterColumns = - OrcUtils.includeColumns(bloomFilterColumnNames, allColumns, inspector); - } - this.bloomFilterFpp = bloomFilterFpp; - treeWriter = createTreeWriter(inspector, streamFactory, false); - if (buildIndex && rowIndexStride < MIN_ROW_INDEX_STRIDE) { - throw new IllegalArgumentException("Row stride must be at least " + - MIN_ROW_INDEX_STRIDE); - } - - // ensure that we are able to handle callbacks before we register ourselves - memoryManager.addWriter(path, stripeSize, this); - } - - private String getColumnNamesFromInspector(ObjectInspector inspector) { - List fieldNames = Lists.newArrayList(); - Joiner joiner = Joiner.on(","); - if (inspector instanceof StructObjectInspector) { - StructObjectInspector soi = (StructObjectInspector) inspector; - List fields = soi.getAllStructFieldRefs(); - fieldNames.addAll(fields.stream().map((Function) StructField::getFieldName).collect(Collectors.toList())); - } - return joiner.join(fieldNames); - } - - @VisibleForTesting - int getEstimatedBufferSize(int bs) { - return getEstimatedBufferSize(conf.get(IOConstants.COLUMNS), bs); - } - - int getEstimatedBufferSize(String colNames, int bs) { - long availableMem = getMemoryAvailableForORC(); - if (colNames != null) { - final int numCols = colNames.split(",").length; - if (numCols > COLUMN_COUNT_THRESHOLD) { - // In BufferedStream, there are 3 outstream buffers (compressed, - // uncompressed and overflow) and list of previously compressed buffers. - // Since overflow buffer is rarely used, lets consider only 2 allocation. - // Also, initially, the list of compression buffers will be empty. - final int outStreamBuffers = codec == null ? 1 : 2; - - // max possible streams per column is 5. For string columns, there is - // ROW_INDEX, PRESENT, DATA, LENGTH, DICTIONARY_DATA streams. - final int maxStreams = 5; - - // Lets assume 10% memory for holding dictionary in memory and other - // object allocations - final long miscAllocation = (long) (0.1f * availableMem); - - // compute the available memory - final long remainingMem = availableMem - miscAllocation; - - int estBufferSize = (int) (remainingMem / (maxStreams * outStreamBuffers * numCols)); - estBufferSize = getClosestBufferSize(estBufferSize, bs); - if (estBufferSize > bs) { - estBufferSize = bs; - } - - LOG.info("WIDE TABLE - Number of columns: " + numCols + " Chosen compression buffer size: " + estBufferSize); - return estBufferSize; - } - } - return bs; - } - - private int getClosestBufferSize(int estBufferSize, int bs) { - final int kb4 = 4 * 1024; - final int kb8 = 8 * 1024; - final int kb16 = 16 * 1024; - final int kb32 = 32 * 1024; - final int kb64 = 64 * 1024; - final int kb128 = 128 * 1024; - final int kb256 = 256 * 1024; - if (estBufferSize <= kb4) { - return kb4; - } else if (estBufferSize > kb4 && estBufferSize <= kb8) { - return kb8; - } else if (estBufferSize > kb8 && estBufferSize <= kb16) { - return kb16; - } else if (estBufferSize > kb16 && estBufferSize <= kb32) { - return kb32; - } else if (estBufferSize > kb32 && estBufferSize <= kb64) { - return kb64; - } else if (estBufferSize > kb64 && estBufferSize <= kb128) { - return kb128; - } else { - return kb256; - } - } - - // the assumption is only one ORC writer open at a time, which holds true for - // most of the cases. HIVE-6455 forces single writer case. - private long getMemoryAvailableForORC() { - HiveConf.ConfVars poolVar = HiveConf.ConfVars.HIVE_ORC_FILE_MEMORY_POOL; - double maxLoad = conf.getFloat(poolVar.varname, poolVar.defaultFloatVal); - long totalMemoryPool = Math.round(ManagementFactory.getMemoryMXBean().getHeapMemoryUsage().getMax() * maxLoad); - return totalMemoryPool; - } - - public static CompressionCodec createCodec(CompressionKind kind) { - switch (kind) { - case NONE: - return null; - case ZLIB: - return new ZlibCodec(); - case SNAPPY: - return new SnappyCodec(); - case LZO: - try { - Class lzo = - (Class) - JavaUtils.loadClass("org.apache.hadoop.hive.ql.io.orc.LzoCodec"); - return lzo.newInstance(); - } catch (ClassNotFoundException e) { - throw new IllegalArgumentException("LZO is not available.", e); - } catch (InstantiationException e) { - throw new IllegalArgumentException("Problem initializing LZO", e); - } catch (IllegalAccessException e) { - throw new IllegalArgumentException("Insufficient access to LZO", e); - } - default: - throw new IllegalArgumentException("Unknown compression codec: " + - kind); - } - } - - @Override - public synchronized boolean checkMemory(double newScale) throws IOException { - long limit = Math.round(adjustedStripeSize * newScale); - long size = estimateStripeSize(); - if (LOG.isDebugEnabled()) { - LOG.debug("ORC writer " + path + " size = " + size + " limit = " + - limit); - } - if (size > limit) { - flushStripe(); - return true; - } - return false; - } - - /** - * This class is used to hold the contents of streams as they are buffered. - * The TreeWriters write to the outStream and the codec compresses the - * data as buffers fill up and stores them in the output list. When the - * stripe is being written, the whole stream is written to the file. - */ - private class BufferedStream implements OutStream.OutputReceiver { - private final OutStream outStream; - private final List output = new ArrayList(); - - BufferedStream(String name, int bufferSize, - CompressionCodec codec) throws IOException { - outStream = new OutStream(name, bufferSize, codec, this); - } - - /** - * Receive a buffer from the compression codec. - * - * @param buffer the buffer to save - * @throws IOException if an error occurs while receiving a buffer - */ - @Override - public void output(ByteBuffer buffer) { - output.add(buffer); - } - - /** - * Get the number of bytes in buffers that are allocated to this stream. - * - * @return number of bytes in buffers - */ - public long getBufferSize() { - long result = 0; - for (ByteBuffer buf : output) { - result += buf.capacity(); - } - return outStream.getBufferSize() + result; - } - - /** - * Flush the stream to the codec. - * - * @throws IOException if an error occurs while flushing the stream - */ - public void flush() throws IOException { - outStream.flush(); - } - - /** - * Clear all of the buffers. - * - * @throws IOException if an error occurs while clearing the buffers - */ - public void clear() throws IOException { - outStream.clear(); - output.clear(); - } - - /** - * Check the state of suppress flag in output stream - * - * @return value of suppress flag - */ - public boolean isSuppressed() { - return outStream.isSuppressed(); - } - - /** - * Get the number of bytes that will be written to the output. Assumes - * the stream has already been flushed. - * - * @return the number of bytes - */ - public long getOutputSize() { - long result = 0; - for (ByteBuffer buffer : output) { - result += buffer.remaining(); - } - return result; - } - - /** - * Write the saved compressed buffers to the OutputStream. - * - * @param out the stream to write to - * @throws IOException if an error occurs during write - */ - void spillTo(OutputStream out) throws IOException { - for (ByteBuffer buffer : output) { - out.write(buffer.array(), buffer.arrayOffset() + buffer.position(), - buffer.remaining()); - } - } - - @Override - public String toString() { - return outStream.toString(); - } - } - - /** - * An output receiver that writes the ByteBuffers to the output stream - * as they are received. - */ - private class DirectStream implements OutStream.OutputReceiver { - private final OutputStream output; - - DirectStream(OutputStream output) { - this.output = output; - } - - @Override - public void output(ByteBuffer buffer) throws IOException { - output.write(buffer.array(), buffer.arrayOffset() + buffer.position(), - buffer.remaining()); - } - } - - private static class RowIndexPositionRecorder implements PositionRecorder { - private final OrcProto.RowIndexEntry.Builder builder; - - RowIndexPositionRecorder(OrcProto.RowIndexEntry.Builder builder) { - this.builder = builder; - } - - @Override - public void addPosition(long position) { - builder.addPositions(position); - } - } - - /** - * Interface from the Writer to the TreeWriters. This limits the visibility - * that the TreeWriters have into the Writer. - */ - private class StreamFactory { - /** - * Create a stream to store part of a column. - * - * @param column the column id for the stream - * @param kind the kind of stream - * @return The output outStream that the section needs to be written to. - * @throws IOException if an error occurs while creating the stream - */ - public OutStream createStream(int column, - OrcProto.Stream.Kind kind - ) throws IOException { - final StreamName name = new StreamName(column, kind); - final EnumSet modifiers; - - switch (kind) { - case BLOOM_FILTER: - case DATA: - case DICTIONARY_DATA: - if (getCompressionStrategy() == CompressionStrategy.SPEED) { - modifiers = EnumSet.of(Modifier.FAST, Modifier.TEXT); - } else { - modifiers = EnumSet.of(Modifier.DEFAULT, Modifier.TEXT); - } - break; - case LENGTH: - case DICTIONARY_COUNT: - case PRESENT: - case ROW_INDEX: - case SECONDARY: - // easily compressed using the fastest modes - modifiers = EnumSet.of(Modifier.FASTEST, Modifier.BINARY); - break; - default: - LOG.warn("Missing ORC compression modifiers for " + kind); - modifiers = null; - break; - } - - BufferedStream result = streams.get(name); - if (result == null) { - result = new BufferedStream(name.toString(), bufferSize, - codec == null ? codec : codec.modify(modifiers)); - streams.put(name, result); - } - return result.outStream; - } - - /** - * Get the next column id. - * - * @return a number from 0 to the number of columns - 1 - */ - public int getNextColumnId() { - return columnCount++; - } - - /** - * Get the current column id. After creating all tree writers this count should tell how many - * columns (including columns within nested complex objects) are created in total. - * - * @return current column id - */ - public int getCurrentColumnId() { - return columnCount; - } - - /** - * Get the stride rate of the row index. - */ - public int getRowIndexStride() { - return rowIndexStride; - } - - /** - * Should be building the row index. - * - * @return true if we are building the index - */ - public boolean buildIndex() { - return buildIndex; - } - - /** - * Is the ORC file compressed? - * - * @return are the streams compressed - */ - public boolean isCompressed() { - return codec != null; - } - - /** - * Get the encoding strategy to use. - * - * @return encoding strategy - */ - public EncodingStrategy getEncodingStrategy() { - return encodingStrategy; - } - - /** - * Get the compression strategy to use. - * - * @return compression strategy - */ - public CompressionStrategy getCompressionStrategy() { - return compressionStrategy; - } - - /** - * Get the bloom filter columns - * - * @return bloom filter columns - */ - public boolean[] getBloomFilterColumns() { - return bloomFilterColumns; - } - - /** - * Get bloom filter false positive percentage. - * - * @return fpp - */ - public double getBloomFilterFPP() { - return bloomFilterFpp; - } - - /** - * Get the writer's configuration. - * - * @return configuration - */ - public Configuration getConfiguration() { - return conf; - } - - /** - * Get the version of the file to write. - */ - public OrcFile.Version getVersion() { - return version; - } - } - - /** - * The parent class of all of the writers for each column. Each column - * is written by an instance of this class. The compound types (struct, - * list, map, and union) have children tree writers that write the children - * types. - */ - private abstract static class TreeWriter { - protected final int id; - protected final ObjectInspector inspector; - private final BitFieldWriter isPresent; - private final boolean isCompressed; - protected final ColumnStatisticsImpl indexStatistics; - protected final ColumnStatisticsImpl stripeColStatistics; - private final ColumnStatisticsImpl fileStatistics; - protected TreeWriter[] childrenWriters; - protected final RowIndexPositionRecorder rowIndexPosition; - private final OrcProto.RowIndex.Builder rowIndex; - private final OrcProto.RowIndexEntry.Builder rowIndexEntry; - private final PositionedOutputStream rowIndexStream; - private final PositionedOutputStream bloomFilterStream; - protected final BloomFilterIO bloomFilter; - protected final boolean createBloomFilter; - private final OrcProto.BloomFilterIndex.Builder bloomFilterIndex; - private final OrcProto.BloomFilter.Builder bloomFilterEntry; - private boolean foundNulls; - private OutStream isPresentOutStream; - private final List stripeStatsBuilders; - - /** - * Create a tree writer. - * - * @param columnId the column id of the column to write - * @param inspector the object inspector to use - * @param streamFactory limited access to the Writer's data. - * @param nullable can the value be null? - * @throws IOException if an error occurs during creation - */ - TreeWriter(int columnId, ObjectInspector inspector, - StreamFactory streamFactory, - boolean nullable) throws IOException { - this.isCompressed = streamFactory.isCompressed(); - this.id = columnId; - this.inspector = inspector; - if (nullable) { - isPresentOutStream = streamFactory.createStream(id, - OrcProto.Stream.Kind.PRESENT); - isPresent = new BitFieldWriter(isPresentOutStream, 1); - } else { - isPresent = null; - } - this.foundNulls = false; - createBloomFilter = streamFactory.getBloomFilterColumns()[columnId]; - indexStatistics = ColumnStatisticsImpl.create(inspector); - stripeColStatistics = ColumnStatisticsImpl.create(inspector); - fileStatistics = ColumnStatisticsImpl.create(inspector); - childrenWriters = new TreeWriter[0]; - rowIndex = OrcProto.RowIndex.newBuilder(); - rowIndexEntry = OrcProto.RowIndexEntry.newBuilder(); - rowIndexPosition = new RowIndexPositionRecorder(rowIndexEntry); - stripeStatsBuilders = Lists.newArrayList(); - if (streamFactory.buildIndex()) { - rowIndexStream = streamFactory.createStream(id, OrcProto.Stream.Kind.ROW_INDEX); - } else { - rowIndexStream = null; - } - if (createBloomFilter) { - bloomFilterEntry = OrcProto.BloomFilter.newBuilder(); - bloomFilterIndex = OrcProto.BloomFilterIndex.newBuilder(); - bloomFilterStream = streamFactory.createStream(id, OrcProto.Stream.Kind.BLOOM_FILTER); - bloomFilter = new BloomFilterIO(streamFactory.getRowIndexStride(), - streamFactory.getBloomFilterFPP()); - } else { - bloomFilterEntry = null; - bloomFilterIndex = null; - bloomFilterStream = null; - bloomFilter = null; - } - } - - protected OrcProto.RowIndex.Builder getRowIndex() { - return rowIndex; - } - - protected ColumnStatisticsImpl getStripeStatistics() { - return stripeColStatistics; - } - - protected ColumnStatisticsImpl getFileStatistics() { - return fileStatistics; - } - - protected OrcProto.RowIndexEntry.Builder getRowIndexEntry() { - return rowIndexEntry; - } - - IntegerWriter createIntegerWriter(PositionedOutputStream output, - boolean signed, boolean isDirectV2, - StreamFactory writer) { - if (isDirectV2) { - boolean alignedBitpacking = false; - if (writer.getEncodingStrategy().equals(EncodingStrategy.SPEED)) { - alignedBitpacking = true; - } - return new RunLengthIntegerWriterV2(output, signed, alignedBitpacking); - } else { - return new RunLengthIntegerWriter(output, signed); - } - } - - boolean isNewWriteFormat(StreamFactory writer) { - return writer.getVersion() != OrcFile.Version.V_0_11; - } - - /** - * Add a new value to the column. - * - * @param obj The value to write to the column - * @throws IOException if an error occurs during add - */ - void write(Object obj) throws IOException { - if (obj != null) { - indexStatistics.increment(); - } else { - indexStatistics.setNull(); - } - if (isPresent != null) { - isPresent.write(obj == null ? 0 : 1); - if (obj == null) { - foundNulls = true; - } - } - } - - private void removeIsPresentPositions() { - for (int i = 0; i < rowIndex.getEntryCount(); ++i) { - RowIndexEntry.Builder entry = rowIndex.getEntryBuilder(i); - List positions = entry.getPositionsList(); - // bit streams use 3 positions if uncompressed, 4 if compressed - positions = positions.subList(isCompressed ? 4 : 3, positions.size()); - entry.clearPositions(); - entry.addAllPositions(positions); - } - } - - /** - * Write the stripe out to the file. - * - * @param builder the stripe footer that contains the information about the - * layout of the stripe. The TreeWriter is required to update - * the footer with its information. - * @param requiredIndexEntries the number of index entries that are - * required. this is to check to make sure the - * row index is well formed. - * @throws IOException if an error occurs during write - */ - void writeStripe(OrcProto.StripeFooter.Builder builder, - int requiredIndexEntries) throws IOException { - if (isPresent != null) { - isPresent.flush(); - - // if no nulls are found in a stream, then suppress the stream - if (!foundNulls) { - isPresentOutStream.suppress(); - // since isPresent bitstream is suppressed, update the index to - // remove the positions of the isPresent stream - if (rowIndexStream != null) { - removeIsPresentPositions(); - } - } - } - - // merge stripe-level column statistics to file statistics and write it to - // stripe statistics - OrcProto.StripeStatistics.Builder stripeStatsBuilder = OrcProto.StripeStatistics.newBuilder(); - writeStripeStatistics(stripeStatsBuilder, this); - stripeStatsBuilders.add(stripeStatsBuilder); - - // reset the flag for next stripe - foundNulls = false; - - builder.addColumns(getEncoding()); - builder.setWriterTimezone(TimeZone.getDefault().getID()); - if (rowIndexStream != null) { - if (rowIndex.getEntryCount() != requiredIndexEntries) { - throw new IllegalArgumentException("Column has wrong number of " + - "index entries found: " + rowIndex.getEntryCount() + " expected: " + - requiredIndexEntries); - } - rowIndex.build().writeTo(rowIndexStream); - rowIndexStream.flush(); - } - rowIndex.clear(); - rowIndexEntry.clear(); - - // write the bloom filter to out stream - if (bloomFilterStream != null) { - bloomFilterIndex.build().writeTo(bloomFilterStream); - bloomFilterStream.flush(); - bloomFilterIndex.clear(); - bloomFilterEntry.clear(); - } - } - - private void writeStripeStatistics(OrcProto.StripeStatistics.Builder builder, - TreeWriter treeWriter) { - treeWriter.fileStatistics.merge(treeWriter.stripeColStatistics); - builder.addColStats(treeWriter.stripeColStatistics.serialize().build()); - treeWriter.stripeColStatistics.reset(); - for (TreeWriter child : treeWriter.getChildrenWriters()) { - writeStripeStatistics(builder, child); - } - } - - TreeWriter[] getChildrenWriters() { - return childrenWriters; - } - - /** - * Get the encoding for this column. - * - * @return the information about the encoding of this column - */ - OrcProto.ColumnEncoding getEncoding() { - return OrcProto.ColumnEncoding.newBuilder().setKind( - OrcProto.ColumnEncoding.Kind.DIRECT).build(); - } - - /** - * Create a row index entry with the previous location and the current - * index statistics. Also merges the index statistics into the file - * statistics before they are cleared. Finally, it records the start of the - * next index and ensures all of the children columns also create an entry. - * - * @throws IOException if an error occurs during create - */ - void createRowIndexEntry() throws IOException { - stripeColStatistics.merge(indexStatistics); - rowIndexEntry.setStatistics(indexStatistics.serialize()); - indexStatistics.reset(); - rowIndex.addEntry(rowIndexEntry); - rowIndexEntry.clear(); - addBloomFilterEntry(); - recordPosition(rowIndexPosition); - for (TreeWriter child : childrenWriters) { - child.createRowIndexEntry(); - } - } - - void addBloomFilterEntry() { - if (createBloomFilter) { - bloomFilterEntry.setNumHashFunctions(bloomFilter.getNumHashFunctions()); - bloomFilterEntry.addAllBitset(Longs.asList(bloomFilter.getBitSet())); - bloomFilterIndex.addBloomFilter(bloomFilterEntry.build()); - bloomFilter.reset(); - bloomFilterEntry.clear(); - } - } - - /** - * Record the current position in each of this column's streams. - * - * @param recorder where should the locations be recorded - * @throws IOException if an error occurs during position recording - */ - void recordPosition(PositionRecorder recorder) throws IOException { - if (isPresent != null) { - isPresent.getPosition(recorder); - } - } - - /** - * Estimate how much memory the writer is consuming excluding the streams. - * - * @return the number of bytes. - */ - long estimateMemory() { - long result = 0; - for (TreeWriter child : childrenWriters) { - result += child.estimateMemory(); - } - return result; - } - - /** - * Handle the top level object write. - * - * This default method is used for all types except structs, which are the - * typical case. VectorizedRowBatch assumes the top level object is a - * struct, so we use the first column for all other types. - * @param batch the batch to write from - * @param offset the row to start on - * @param length the number of rows to write - * @throws IOException if an error occurs during write - */ - void writeRootBatch(VectorizedRowBatch batch, int offset, - int length) throws IOException { - writeBatch(batch.cols[0], offset, length); - } - - /** - * Write the values from the given vector from offset for length elements. - * @param vector the vector to write from - * @param offset the first value from the vector to write - * @param length the number of values from the vector to write - * @throws IOException if an error occurs during write - */ - void writeBatch(ColumnVector vector, int offset, - int length) throws IOException { - if (vector.noNulls) { - if (isPresent != null) { - for (int i = 0; i < length; ++i) { - isPresent.write(1); - indexStatistics.increment(); - } - } - } else { - if (vector.isRepeating) { - boolean isNull = vector.isNull[0]; - if (isPresent != null) { - for (int i = 0; i < length; ++i) { - isPresent.write(isNull ? 0 : 1); - } - } - if (isNull) { - foundNulls = true; - indexStatistics.setNull(); - } else { - indexStatistics.increment(); - } - } else { - // count the number of non-null values - int nonNullCount = 0; - for(int i = 0; i < length; ++i) { - boolean isNull = vector.isNull[i + offset]; - if (!isNull) { - nonNullCount += 1; - } - if (isPresent != null) { - isPresent.write(isNull ? 0 : 1); - } - } - indexStatistics.increment(); - if (nonNullCount != length) { - foundNulls = true; - indexStatistics.setNull(); - } - } - } - } - } - - private static class BooleanTreeWriter extends TreeWriter { - private final BitFieldWriter writer; - - BooleanTreeWriter(int columnId, - ObjectInspector inspector, - StreamFactory writer, - boolean nullable) throws IOException { - super(columnId, inspector, writer, nullable); - PositionedOutputStream out = writer.createStream(id, - OrcProto.Stream.Kind.DATA); - this.writer = new BitFieldWriter(out, 1); - recordPosition(rowIndexPosition); - } - - @Override - void write(Object obj) throws IOException { - super.write(obj); - if (obj != null) { - boolean val = ((BooleanObjectInspector) inspector).get(obj); - indexStatistics.updateBoolean(val); - writer.write(val ? 1 : 0); - } - } - - @Override - void writeStripe(OrcProto.StripeFooter.Builder builder, - int requiredIndexEntries) throws IOException { - super.writeStripe(builder, requiredIndexEntries); - writer.flush(); - recordPosition(rowIndexPosition); - } - - @Override - void recordPosition(PositionRecorder recorder) throws IOException { - super.recordPosition(recorder); - writer.getPosition(recorder); - } - } - - private static class ByteTreeWriter extends TreeWriter { - private final RunLengthByteWriter writer; - - ByteTreeWriter(int columnId, - ObjectInspector inspector, - StreamFactory writer, - boolean nullable) throws IOException { - super(columnId, inspector, writer, nullable); - this.writer = new RunLengthByteWriter(writer.createStream(id, - OrcProto.Stream.Kind.DATA)); - recordPosition(rowIndexPosition); - } - - @Override - void write(Object obj) throws IOException { - super.write(obj); - if (obj != null) { - byte val = ((ByteObjectInspector) inspector).get(obj); - indexStatistics.updateInteger(val); - if (createBloomFilter) { - bloomFilter.addLong(val); - } - writer.write(val); - } - } - - @Override - void writeStripe(OrcProto.StripeFooter.Builder builder, - int requiredIndexEntries) throws IOException { - super.writeStripe(builder, requiredIndexEntries); - writer.flush(); - recordPosition(rowIndexPosition); - } - - @Override - void recordPosition(PositionRecorder recorder) throws IOException { - super.recordPosition(recorder); - writer.getPosition(recorder); - } - } - - private static class IntegerTreeWriter extends TreeWriter { - private final IntegerWriter writer; - private final ShortObjectInspector shortInspector; - private final IntObjectInspector intInspector; - private final LongObjectInspector longInspector; - private boolean isDirectV2 = true; - - IntegerTreeWriter(int columnId, - ObjectInspector inspector, - StreamFactory writer, - boolean nullable) throws IOException { - super(columnId, inspector, writer, nullable); - OutStream out = writer.createStream(id, - OrcProto.Stream.Kind.DATA); - this.isDirectV2 = isNewWriteFormat(writer); - this.writer = createIntegerWriter(out, true, isDirectV2, writer); - if (inspector instanceof IntObjectInspector) { - intInspector = (IntObjectInspector) inspector; - shortInspector = null; - longInspector = null; - } else { - intInspector = null; - if (inspector instanceof LongObjectInspector) { - longInspector = (LongObjectInspector) inspector; - shortInspector = null; - } else { - shortInspector = (ShortObjectInspector) inspector; - longInspector = null; - } - } - recordPosition(rowIndexPosition); - } - - @Override - OrcProto.ColumnEncoding getEncoding() { - if (isDirectV2) { - return OrcProto.ColumnEncoding.newBuilder() - .setKind(OrcProto.ColumnEncoding.Kind.DIRECT_V2).build(); - } - return OrcProto.ColumnEncoding.newBuilder() - .setKind(OrcProto.ColumnEncoding.Kind.DIRECT).build(); - } - - @Override - void write(Object obj) throws IOException { - super.write(obj); - if (obj != null) { - long val; - if (intInspector != null) { - val = intInspector.get(obj); - } else if (longInspector != null) { - val = longInspector.get(obj); - } else { - val = shortInspector.get(obj); - } - indexStatistics.updateInteger(val); - if (createBloomFilter) { - // integers are converted to longs in column statistics and during SARG evaluation - bloomFilter.addLong(val); - } - writer.write(val); - } - } - - @Override - void writeStripe(OrcProto.StripeFooter.Builder builder, - int requiredIndexEntries) throws IOException { - super.writeStripe(builder, requiredIndexEntries); - writer.flush(); - recordPosition(rowIndexPosition); - } - - @Override - void recordPosition(PositionRecorder recorder) throws IOException { - super.recordPosition(recorder); - writer.getPosition(recorder); - } - } - - private static class FloatTreeWriter extends TreeWriter { - private final PositionedOutputStream stream; - private final SerializationUtils utils; - - FloatTreeWriter(int columnId, - ObjectInspector inspector, - StreamFactory writer, - boolean nullable) throws IOException { - super(columnId, inspector, writer, nullable); - this.stream = writer.createStream(id, - OrcProto.Stream.Kind.DATA); - this.utils = new SerializationUtils(); - recordPosition(rowIndexPosition); - } - - @Override - void write(Object obj) throws IOException { - super.write(obj); - if (obj != null) { - float val = ((FloatObjectInspector) inspector).get(obj); - indexStatistics.updateDouble(val); - if (createBloomFilter) { - // floats are converted to doubles in column statistics and during SARG evaluation - bloomFilter.addDouble(val); - } - utils.writeFloat(stream, val); - } - } - - @Override - void writeStripe(OrcProto.StripeFooter.Builder builder, - int requiredIndexEntries) throws IOException { - super.writeStripe(builder, requiredIndexEntries); - stream.flush(); - recordPosition(rowIndexPosition); - } - - @Override - void recordPosition(PositionRecorder recorder) throws IOException { - super.recordPosition(recorder); - stream.getPosition(recorder); - } - } - - private static class DoubleTreeWriter extends TreeWriter { - private final PositionedOutputStream stream; - private final SerializationUtils utils; - - DoubleTreeWriter(int columnId, - ObjectInspector inspector, - StreamFactory writer, - boolean nullable) throws IOException { - super(columnId, inspector, writer, nullable); - this.stream = writer.createStream(id, - OrcProto.Stream.Kind.DATA); - this.utils = new SerializationUtils(); - recordPosition(rowIndexPosition); - } - - @Override - void write(Object obj) throws IOException { - super.write(obj); - if (obj != null) { - double val = ((DoubleObjectInspector) inspector).get(obj); - indexStatistics.updateDouble(val); - if (createBloomFilter) { - bloomFilter.addDouble(val); - } - utils.writeDouble(stream, val); - } - } - - @Override - void writeStripe(OrcProto.StripeFooter.Builder builder, - int requiredIndexEntries) throws IOException { - super.writeStripe(builder, requiredIndexEntries); - stream.flush(); - recordPosition(rowIndexPosition); - } - - @Override - void recordPosition(PositionRecorder recorder) throws IOException { - super.recordPosition(recorder); - stream.getPosition(recorder); - } - } - - private static class StringTreeWriter extends TreeWriter { - private static final int INITIAL_DICTIONARY_SIZE = 4096; - private final OutStream stringOutput; - private final IntegerWriter lengthOutput; - private final IntegerWriter rowOutput; - private final StringRedBlackTree dictionary = - new StringRedBlackTree(INITIAL_DICTIONARY_SIZE); - private final DynamicIntArray rows = new DynamicIntArray(); - private final PositionedOutputStream directStreamOutput; - private final IntegerWriter directLengthOutput; - private final List savedRowIndex = - new ArrayList(); - private final boolean buildIndex; - private final List rowIndexValueCount = new ArrayList(); - // If the number of keys in a dictionary is greater than this fraction of - //the total number of non-null rows, turn off dictionary encoding - private final float dictionaryKeySizeThreshold; - private boolean useDictionaryEncoding = true; - private boolean isDirectV2 = true; - private boolean doneDictionaryCheck; - private final boolean strideDictionaryCheck; - - StringTreeWriter(int columnId, - ObjectInspector inspector, - StreamFactory writer, - boolean nullable) throws IOException { - super(columnId, inspector, writer, nullable); - this.isDirectV2 = isNewWriteFormat(writer); - stringOutput = writer.createStream(id, - OrcProto.Stream.Kind.DICTIONARY_DATA); - lengthOutput = createIntegerWriter(writer.createStream(id, - OrcProto.Stream.Kind.LENGTH), false, isDirectV2, writer); - rowOutput = createIntegerWriter(writer.createStream(id, - OrcProto.Stream.Kind.DATA), false, isDirectV2, writer); - recordPosition(rowIndexPosition); - rowIndexValueCount.add(0L); - buildIndex = writer.buildIndex(); - directStreamOutput = writer.createStream(id, OrcProto.Stream.Kind.DATA); - directLengthOutput = createIntegerWriter(writer.createStream(id, - OrcProto.Stream.Kind.LENGTH), false, isDirectV2, writer); - dictionaryKeySizeThreshold = writer.getConfiguration().getFloat( - HiveConf.ConfVars.HIVE_ORC_DICTIONARY_KEY_SIZE_THRESHOLD.varname, - HiveConf.ConfVars.HIVE_ORC_DICTIONARY_KEY_SIZE_THRESHOLD.defaultFloatVal); - strideDictionaryCheck = writer.getConfiguration().getBoolean( - HiveConf.ConfVars.HIVE_ORC_ROW_INDEX_STRIDE_DICTIONARY_CHECK.varname, - HiveConf.ConfVars.HIVE_ORC_ROW_INDEX_STRIDE_DICTIONARY_CHECK.defaultBoolVal); - doneDictionaryCheck = false; - } - - /** - * Method to retrieve text values from the value object, which can be overridden - * by subclasses. - * - * @param obj value - * @return Text text value from obj - */ - Text getTextValue(Object obj) { - return ((StringObjectInspector) inspector).getPrimitiveWritableObject(obj); - } - - @Override - void write(Object obj) throws IOException { - super.write(obj); - if (obj != null) { - Text val = getTextValue(obj); - if (useDictionaryEncoding || !strideDictionaryCheck) { - rows.add(dictionary.add(val)); - } else { - // write data and length - directStreamOutput.write(val.getBytes(), 0, val.getLength()); - directLengthOutput.write(val.getLength()); - } - indexStatistics.updateString(val); - if (createBloomFilter) { - bloomFilter.addBytes(val.getBytes(), val.getLength()); - } - } - } - - private boolean checkDictionaryEncoding() { - if (!doneDictionaryCheck) { - // Set the flag indicating whether or not to use dictionary encoding - // based on whether or not the fraction of distinct keys over number of - // non-null rows is less than the configured threshold - float ratio = rows.size() > 0 ? (float) (dictionary.size()) / rows.size() : 0.0f; - useDictionaryEncoding = !isDirectV2 || ratio <= dictionaryKeySizeThreshold; - doneDictionaryCheck = true; - } - return useDictionaryEncoding; - } - - @Override - void writeStripe(OrcProto.StripeFooter.Builder builder, - int requiredIndexEntries) throws IOException { - // if rows in stripe is less than dictionaryCheckAfterRows, dictionary - // checking would not have happened. So do it again here. - checkDictionaryEncoding(); - - if (useDictionaryEncoding) { - flushDictionary(); - } else { - // flushout any left over entries from dictionary - if (rows.size() > 0) { - flushDictionary(); - } - - // suppress the stream for every stripe if dictionary is disabled - stringOutput.suppress(); - } - - // we need to build the rowindex before calling super, since it - // writes it out. - super.writeStripe(builder, requiredIndexEntries); - stringOutput.flush(); - lengthOutput.flush(); - rowOutput.flush(); - directStreamOutput.flush(); - directLengthOutput.flush(); - // reset all of the fields to be ready for the next stripe. - dictionary.clear(); - savedRowIndex.clear(); - rowIndexValueCount.clear(); - recordPosition(rowIndexPosition); - rowIndexValueCount.add(0L); - - if (!useDictionaryEncoding) { - // record the start positions of first index stride of next stripe i.e - // beginning of the direct streams when dictionary is disabled - recordDirectStreamPosition(); - } - } - - private void flushDictionary() throws IOException { - final int[] dumpOrder = new int[dictionary.size()]; - - if (useDictionaryEncoding) { - // Write the dictionary by traversing the red-black tree writing out - // the bytes and lengths; and creating the map from the original order - // to the final sorted order. - - dictionary.visit(new StringRedBlackTree.Visitor() { - private int currentId = 0; - - @Override - public void visit(StringRedBlackTree.VisitorContext context - ) throws IOException { - context.writeBytes(stringOutput); - lengthOutput.write(context.getLength()); - dumpOrder[context.getOriginalPosition()] = currentId++; - } - }); - } else { - // for direct encoding, we don't want the dictionary data stream - stringOutput.suppress(); - } - int length = rows.size(); - int rowIndexEntry = 0; - OrcProto.RowIndex.Builder rowIndex = getRowIndex(); - Text text = new Text(); - // write the values translated into the dump order. - for (int i = 0; i <= length; ++i) { - // now that we are writing out the row values, we can finalize the - // row index - if (buildIndex) { - while (i == rowIndexValueCount.get(rowIndexEntry) && rowIndexEntry < savedRowIndex.size()) { - OrcProto.RowIndexEntry.Builder base = - savedRowIndex.get(rowIndexEntry++).toBuilder(); - if (useDictionaryEncoding) { - rowOutput.getPosition(new RowIndexPositionRecorder(base)); - } else { - PositionRecorder posn = new RowIndexPositionRecorder(base); - directStreamOutput.getPosition(posn); - directLengthOutput.getPosition(posn); - } - rowIndex.addEntry(base.build()); - } - } - if (i != length) { - if (useDictionaryEncoding) { - rowOutput.write(dumpOrder[rows.get(i)]); - } else { - dictionary.getText(text, rows.get(i)); - directStreamOutput.write(text.getBytes(), 0, text.getLength()); - directLengthOutput.write(text.getLength()); - } - } - } - rows.clear(); - } - - @Override - OrcProto.ColumnEncoding getEncoding() { - // Returns the encoding used for the last call to writeStripe - if (useDictionaryEncoding) { - if (isDirectV2) { - return OrcProto.ColumnEncoding.newBuilder().setKind( - OrcProto.ColumnEncoding.Kind.DICTIONARY_V2).setDictionarySize(dictionary.size()).build(); - } - return OrcProto.ColumnEncoding.newBuilder().setKind( - OrcProto.ColumnEncoding.Kind.DICTIONARY).setDictionarySize(dictionary.size()).build(); - } else { - if (isDirectV2) { - return OrcProto.ColumnEncoding.newBuilder().setKind( - OrcProto.ColumnEncoding.Kind.DIRECT_V2).build(); - } - return OrcProto.ColumnEncoding.newBuilder().setKind( - OrcProto.ColumnEncoding.Kind.DIRECT).build(); - } - } - - /** - * This method doesn't call the super method, because unlike most of the - * other TreeWriters, this one can't record the position in the streams - * until the stripe is being flushed. Therefore it saves all of the entries - * and augments them with the final information as the stripe is written. - * - * @throws IOException if an error occurs on create - */ - @Override - void createRowIndexEntry() throws IOException { - getStripeStatistics().merge(indexStatistics); - OrcProto.RowIndexEntry.Builder rowIndexEntry = getRowIndexEntry(); - rowIndexEntry.setStatistics(indexStatistics.serialize()); - indexStatistics.reset(); - OrcProto.RowIndexEntry base = rowIndexEntry.build(); - savedRowIndex.add(base); - rowIndexEntry.clear(); - addBloomFilterEntry(); - recordPosition(rowIndexPosition); - rowIndexValueCount.add(Long.valueOf(rows.size())); - if (strideDictionaryCheck) { - checkDictionaryEncoding(); - } - if (!useDictionaryEncoding) { - if (rows.size() > 0) { - flushDictionary(); - // just record the start positions of next index stride - recordDirectStreamPosition(); - } else { - // record the start positions of next index stride - recordDirectStreamPosition(); - getRowIndex().addEntry(base); - } - } - } - - private void recordDirectStreamPosition() throws IOException { - directStreamOutput.getPosition(rowIndexPosition); - directLengthOutput.getPosition(rowIndexPosition); - } - - @Override - long estimateMemory() { - return rows.getSizeInBytes() + dictionary.getSizeInBytes(); - } - } - - /** - * Under the covers, char is written to ORC the same way as string. - */ - private static class CharTreeWriter extends StringTreeWriter { - - CharTreeWriter(int columnId, - ObjectInspector inspector, - StreamFactory writer, - boolean nullable) throws IOException { - super(columnId, inspector, writer, nullable); - } - - /** - * Override base class implementation to support char values. - */ - @Override - Text getTextValue(Object obj) { - return (((HiveCharObjectInspector) inspector) - .getPrimitiveWritableObject(obj)).getTextValue(); - } - } - - /** - * Under the covers, varchar is written to ORC the same way as string. - */ - private static class VarcharTreeWriter extends StringTreeWriter { - - VarcharTreeWriter(int columnId, - ObjectInspector inspector, - StreamFactory writer, - boolean nullable) throws IOException { - super(columnId, inspector, writer, nullable); - } - - /** - * Override base class implementation to support varchar values. - */ - @Override - Text getTextValue(Object obj) { - return (((HiveVarcharObjectInspector) inspector) - .getPrimitiveWritableObject(obj)).getTextValue(); - } - } - - private static class BinaryTreeWriter extends TreeWriter { - private final PositionedOutputStream stream; - private final IntegerWriter length; - private boolean isDirectV2 = true; - - BinaryTreeWriter(int columnId, - ObjectInspector inspector, - StreamFactory writer, - boolean nullable) throws IOException { - super(columnId, inspector, writer, nullable); - this.stream = writer.createStream(id, - OrcProto.Stream.Kind.DATA); - this.isDirectV2 = isNewWriteFormat(writer); - this.length = createIntegerWriter(writer.createStream(id, - OrcProto.Stream.Kind.LENGTH), false, isDirectV2, writer); - recordPosition(rowIndexPosition); - } - - @Override - OrcProto.ColumnEncoding getEncoding() { - if (isDirectV2) { - return OrcProto.ColumnEncoding.newBuilder() - .setKind(OrcProto.ColumnEncoding.Kind.DIRECT_V2).build(); - } - return OrcProto.ColumnEncoding.newBuilder() - .setKind(OrcProto.ColumnEncoding.Kind.DIRECT).build(); - } - - @Override - void write(Object obj) throws IOException { - super.write(obj); - if (obj != null) { - BytesWritable val = - ((BinaryObjectInspector) inspector).getPrimitiveWritableObject(obj); - stream.write(val.getBytes(), 0, val.getLength()); - length.write(val.getLength()); - indexStatistics.updateBinary(val); - if (createBloomFilter) { - bloomFilter.addBytes(val.getBytes(), val.getLength()); - } - } - } - - @Override - void writeStripe(OrcProto.StripeFooter.Builder builder, - int requiredIndexEntries) throws IOException { - super.writeStripe(builder, requiredIndexEntries); - stream.flush(); - length.flush(); - recordPosition(rowIndexPosition); - } - - @Override - void recordPosition(PositionRecorder recorder) throws IOException { - super.recordPosition(recorder); - stream.getPosition(recorder); - length.getPosition(recorder); - } - } - - static final int MILLIS_PER_SECOND = 1000; - static final String BASE_TIMESTAMP_STRING = "2015-01-01 00:00:00"; - - private static class TimestampTreeWriter extends TreeWriter { - private final IntegerWriter seconds; - private final IntegerWriter nanos; - private final boolean isDirectV2; - private final long base_timestamp; - - TimestampTreeWriter(int columnId, - ObjectInspector inspector, - StreamFactory writer, - boolean nullable) throws IOException { - super(columnId, inspector, writer, nullable); - this.isDirectV2 = isNewWriteFormat(writer); - this.seconds = createIntegerWriter(writer.createStream(id, - OrcProto.Stream.Kind.DATA), true, isDirectV2, writer); - this.nanos = createIntegerWriter(writer.createStream(id, - OrcProto.Stream.Kind.SECONDARY), false, isDirectV2, writer); - recordPosition(rowIndexPosition); - // for unit tests to set different time zones - this.base_timestamp = Timestamp.valueOf(BASE_TIMESTAMP_STRING).getTime() / MILLIS_PER_SECOND; - } - - @Override - OrcProto.ColumnEncoding getEncoding() { - if (isDirectV2) { - return OrcProto.ColumnEncoding.newBuilder() - .setKind(OrcProto.ColumnEncoding.Kind.DIRECT_V2).build(); - } - return OrcProto.ColumnEncoding.newBuilder() - .setKind(OrcProto.ColumnEncoding.Kind.DIRECT).build(); - } - - @Override - void write(Object obj) throws IOException { - super.write(obj); - if (obj != null) { - Timestamp val = - ((TimestampObjectInspector) inspector).getPrimitiveJavaObject(obj); - indexStatistics.updateTimestamp(val); - seconds.write((val.getTime() / MILLIS_PER_SECOND) - base_timestamp); - nanos.write(formatNanos(val.getNanos())); - if (createBloomFilter) { - bloomFilter.addLong(val.getTime()); - } - } - } - - @Override - void writeStripe(OrcProto.StripeFooter.Builder builder, - int requiredIndexEntries) throws IOException { - super.writeStripe(builder, requiredIndexEntries); - seconds.flush(); - nanos.flush(); - recordPosition(rowIndexPosition); - } - - private static long formatNanos(int nanos) { - if (nanos == 0) { - return 0; - } else if (nanos % 100 != 0) { - return ((long) nanos) << 3; - } else { - nanos /= 100; - int trailingZeros = 1; - while (nanos % 10 == 0 && trailingZeros < 7) { - nanos /= 10; - trailingZeros += 1; - } - return ((long) nanos) << 3 | trailingZeros; - } - } - - @Override - void recordPosition(PositionRecorder recorder) throws IOException { - super.recordPosition(recorder); - seconds.getPosition(recorder); - nanos.getPosition(recorder); - } - } - - private static class DateTreeWriter extends TreeWriter { - private final IntegerWriter writer; - private final boolean isDirectV2; - - DateTreeWriter(int columnId, - ObjectInspector inspector, - StreamFactory writer, - boolean nullable) throws IOException { - super(columnId, inspector, writer, nullable); - OutStream out = writer.createStream(id, - OrcProto.Stream.Kind.DATA); - this.isDirectV2 = isNewWriteFormat(writer); - this.writer = createIntegerWriter(out, true, isDirectV2, writer); - recordPosition(rowIndexPosition); - } - - @Override - void write(Object obj) throws IOException { - super.write(obj); - if (obj != null) { - // Using the Writable here as it's used directly for writing as well as for stats. - DateWritable val = ((DateObjectInspector) inspector).getPrimitiveWritableObject(obj); - indexStatistics.updateDate(val); - writer.write(val.getDays()); - if (createBloomFilter) { - bloomFilter.addLong(val.getDays()); - } - } - } - - @Override - void writeStripe(OrcProto.StripeFooter.Builder builder, - int requiredIndexEntries) throws IOException { - super.writeStripe(builder, requiredIndexEntries); - writer.flush(); - recordPosition(rowIndexPosition); - } - - @Override - void recordPosition(PositionRecorder recorder) throws IOException { - super.recordPosition(recorder); - writer.getPosition(recorder); - } - - @Override - OrcProto.ColumnEncoding getEncoding() { - if (isDirectV2) { - return OrcProto.ColumnEncoding.newBuilder() - .setKind(OrcProto.ColumnEncoding.Kind.DIRECT_V2).build(); - } - return OrcProto.ColumnEncoding.newBuilder() - .setKind(OrcProto.ColumnEncoding.Kind.DIRECT).build(); - } - } - - private static class DecimalTreeWriter extends TreeWriter { - private final PositionedOutputStream valueStream; - private final IntegerWriter scaleStream; - private final boolean isDirectV2; - - DecimalTreeWriter(int columnId, - ObjectInspector inspector, - StreamFactory writer, - boolean nullable) throws IOException { - super(columnId, inspector, writer, nullable); - this.isDirectV2 = isNewWriteFormat(writer); - valueStream = writer.createStream(id, OrcProto.Stream.Kind.DATA); - this.scaleStream = createIntegerWriter(writer.createStream(id, - OrcProto.Stream.Kind.SECONDARY), true, isDirectV2, writer); - recordPosition(rowIndexPosition); - } - - @Override - OrcProto.ColumnEncoding getEncoding() { - if (isDirectV2) { - return OrcProto.ColumnEncoding.newBuilder() - .setKind(OrcProto.ColumnEncoding.Kind.DIRECT_V2).build(); - } - return OrcProto.ColumnEncoding.newBuilder() - .setKind(OrcProto.ColumnEncoding.Kind.DIRECT).build(); - } - - @Override - void write(Object obj) throws IOException { - super.write(obj); - if (obj != null) { - HiveDecimal decimal = ((HiveDecimalObjectInspector) inspector).getPrimitiveJavaObject(obj); - if (decimal == null) { - return; - } - SerializationUtils.writeBigInteger(valueStream, - decimal.unscaledValue()); - scaleStream.write(decimal.scale()); - indexStatistics.updateDecimal(decimal); - if (createBloomFilter) { - bloomFilter.addString(decimal.toString()); - } - } - } - - @Override - void writeStripe(OrcProto.StripeFooter.Builder builder, - int requiredIndexEntries) throws IOException { - super.writeStripe(builder, requiredIndexEntries); - valueStream.flush(); - scaleStream.flush(); - recordPosition(rowIndexPosition); - } - - @Override - void recordPosition(PositionRecorder recorder) throws IOException { - super.recordPosition(recorder); - valueStream.getPosition(recorder); - scaleStream.getPosition(recorder); - } - } - - private static class StructTreeWriter extends TreeWriter { - private final List fields; - - StructTreeWriter(int columnId, - ObjectInspector inspector, - StreamFactory writer, - boolean nullable) throws IOException { - super(columnId, inspector, writer, nullable); - StructObjectInspector structObjectInspector = - (StructObjectInspector) inspector; - fields = structObjectInspector.getAllStructFieldRefs(); - childrenWriters = new TreeWriter[fields.size()]; - for (int i = 0; i < childrenWriters.length; ++i) { - childrenWriters[i] = createTreeWriter( - fields.get(i).getFieldObjectInspector(), writer, true); - } - recordPosition(rowIndexPosition); - } - - @Override - void write(Object obj) throws IOException { - super.write(obj); - if (obj != null) { - StructObjectInspector insp = (StructObjectInspector) inspector; - for (int i = 0; i < fields.size(); ++i) { - StructField field = fields.get(i); - TreeWriter writer = childrenWriters[i]; - writer.write(insp.getStructFieldData(obj, field)); - } - } - } - - @Override - void writeStripe(OrcProto.StripeFooter.Builder builder, - int requiredIndexEntries) throws IOException { - super.writeStripe(builder, requiredIndexEntries); - for (TreeWriter child : childrenWriters) { - child.writeStripe(builder, requiredIndexEntries); - } - recordPosition(rowIndexPosition); - } - } - - private static class ListTreeWriter extends TreeWriter { - private final IntegerWriter lengths; - private final boolean isDirectV2; - - ListTreeWriter(int columnId, - ObjectInspector inspector, - StreamFactory writer, - boolean nullable) throws IOException { - super(columnId, inspector, writer, nullable); - this.isDirectV2 = isNewWriteFormat(writer); - ListObjectInspector listObjectInspector = (ListObjectInspector) inspector; - childrenWriters = new TreeWriter[1]; - childrenWriters[0] = - createTreeWriter(listObjectInspector.getListElementObjectInspector(), - writer, true); - lengths = createIntegerWriter(writer.createStream(columnId, - OrcProto.Stream.Kind.LENGTH), false, isDirectV2, writer); - recordPosition(rowIndexPosition); - } - - @Override - OrcProto.ColumnEncoding getEncoding() { - if (isDirectV2) { - return OrcProto.ColumnEncoding.newBuilder() - .setKind(OrcProto.ColumnEncoding.Kind.DIRECT_V2).build(); - } - return OrcProto.ColumnEncoding.newBuilder() - .setKind(OrcProto.ColumnEncoding.Kind.DIRECT).build(); - } - - @Override - void write(Object obj) throws IOException { - super.write(obj); - if (obj != null) { - ListObjectInspector insp = (ListObjectInspector) inspector; - int len = insp.getListLength(obj); - lengths.write(len); - if (createBloomFilter) { - bloomFilter.addLong(len); - } - for (int i = 0; i < len; ++i) { - childrenWriters[0].write(insp.getListElement(obj, i)); - } - } - } - - @Override - void writeStripe(OrcProto.StripeFooter.Builder builder, - int requiredIndexEntries) throws IOException { - super.writeStripe(builder, requiredIndexEntries); - lengths.flush(); - for (TreeWriter child : childrenWriters) { - child.writeStripe(builder, requiredIndexEntries); - } - recordPosition(rowIndexPosition); - } - - @Override - void recordPosition(PositionRecorder recorder) throws IOException { - super.recordPosition(recorder); - lengths.getPosition(recorder); - } - } - - private static class MapTreeWriter extends TreeWriter { - private final IntegerWriter lengths; - private final boolean isDirectV2; - - MapTreeWriter(int columnId, - ObjectInspector inspector, - StreamFactory writer, - boolean nullable) throws IOException { - super(columnId, inspector, writer, nullable); - this.isDirectV2 = isNewWriteFormat(writer); - MapObjectInspector insp = (MapObjectInspector) inspector; - childrenWriters = new TreeWriter[2]; - childrenWriters[0] = - createTreeWriter(insp.getMapKeyObjectInspector(), writer, true); - childrenWriters[1] = - createTreeWriter(insp.getMapValueObjectInspector(), writer, true); - lengths = createIntegerWriter(writer.createStream(columnId, - OrcProto.Stream.Kind.LENGTH), false, isDirectV2, writer); - recordPosition(rowIndexPosition); - } - - @Override - OrcProto.ColumnEncoding getEncoding() { - if (isDirectV2) { - return OrcProto.ColumnEncoding.newBuilder() - .setKind(OrcProto.ColumnEncoding.Kind.DIRECT_V2).build(); - } - return OrcProto.ColumnEncoding.newBuilder() - .setKind(OrcProto.ColumnEncoding.Kind.DIRECT).build(); - } - - @Override - void write(Object obj) throws IOException { - super.write(obj); - if (obj != null) { - MapObjectInspector insp = (MapObjectInspector) inspector; - // this sucks, but it will have to do until we can get a better - // accessor in the MapObjectInspector. - Map valueMap = insp.getMap(obj); - lengths.write(valueMap.size()); - if (createBloomFilter) { - bloomFilter.addLong(valueMap.size()); - } - for (Map.Entry entry : valueMap.entrySet()) { - childrenWriters[0].write(entry.getKey()); - childrenWriters[1].write(entry.getValue()); - } - } - } - - @Override - void writeStripe(OrcProto.StripeFooter.Builder builder, - int requiredIndexEntries) throws IOException { - super.writeStripe(builder, requiredIndexEntries); - lengths.flush(); - for (TreeWriter child : childrenWriters) { - child.writeStripe(builder, requiredIndexEntries); - } - recordPosition(rowIndexPosition); - } - - @Override - void recordPosition(PositionRecorder recorder) throws IOException { - super.recordPosition(recorder); - lengths.getPosition(recorder); - } - } - - private static class UnionTreeWriter extends TreeWriter { - private final RunLengthByteWriter tags; - - UnionTreeWriter(int columnId, - ObjectInspector inspector, - StreamFactory writer, - boolean nullable) throws IOException { - super(columnId, inspector, writer, nullable); - UnionObjectInspector insp = (UnionObjectInspector) inspector; - List choices = insp.getObjectInspectors(); - childrenWriters = new TreeWriter[choices.size()]; - for (int i = 0; i < childrenWriters.length; ++i) { - childrenWriters[i] = createTreeWriter(choices.get(i), writer, true); - } - tags = - new RunLengthByteWriter(writer.createStream(columnId, - OrcProto.Stream.Kind.DATA)); - recordPosition(rowIndexPosition); - } - - @Override - void write(Object obj) throws IOException { - super.write(obj); - if (obj != null) { - UnionObjectInspector insp = (UnionObjectInspector) inspector; - byte tag = insp.getTag(obj); - tags.write(tag); - if (createBloomFilter) { - bloomFilter.addLong(tag); - } - childrenWriters[tag].write(insp.getField(obj)); - } - } - - @Override - void writeStripe(OrcProto.StripeFooter.Builder builder, - int requiredIndexEntries) throws IOException { - super.writeStripe(builder, requiredIndexEntries); - tags.flush(); - for (TreeWriter child : childrenWriters) { - child.writeStripe(builder, requiredIndexEntries); - } - recordPosition(rowIndexPosition); - } - - @Override - void recordPosition(PositionRecorder recorder) throws IOException { - super.recordPosition(recorder); - tags.getPosition(recorder); - } - } - - private static TreeWriter createTreeWriter(ObjectInspector inspector, - StreamFactory streamFactory, - boolean nullable) throws IOException { - switch (inspector.getCategory()) { - case PRIMITIVE: - switch (((PrimitiveObjectInspector) inspector).getPrimitiveCategory()) { - case BOOLEAN: - return new BooleanTreeWriter(streamFactory.getNextColumnId(), - inspector, streamFactory, nullable); - case BYTE: - return new ByteTreeWriter(streamFactory.getNextColumnId(), - inspector, streamFactory, nullable); - case SHORT: - case INT: - case LONG: - return new IntegerTreeWriter(streamFactory.getNextColumnId(), - inspector, streamFactory, nullable); - case FLOAT: - return new FloatTreeWriter(streamFactory.getNextColumnId(), - inspector, streamFactory, nullable); - case DOUBLE: - return new DoubleTreeWriter(streamFactory.getNextColumnId(), - inspector, streamFactory, nullable); - case STRING: - return new StringTreeWriter(streamFactory.getNextColumnId(), - inspector, streamFactory, nullable); - case CHAR: - return new CharTreeWriter(streamFactory.getNextColumnId(), - inspector, streamFactory, nullable); - case VARCHAR: - return new VarcharTreeWriter(streamFactory.getNextColumnId(), - inspector, streamFactory, nullable); - case BINARY: - return new BinaryTreeWriter(streamFactory.getNextColumnId(), - inspector, streamFactory, nullable); - case TIMESTAMP: - return new TimestampTreeWriter(streamFactory.getNextColumnId(), - inspector, streamFactory, nullable); - case DATE: - return new DateTreeWriter(streamFactory.getNextColumnId(), - inspector, streamFactory, nullable); - case DECIMAL: - return new DecimalTreeWriter(streamFactory.getNextColumnId(), - inspector, streamFactory, nullable); - default: - throw new IllegalArgumentException("Bad primitive category " + - ((PrimitiveObjectInspector) inspector).getPrimitiveCategory()); - } - case STRUCT: - return new StructTreeWriter(streamFactory.getNextColumnId(), inspector, - streamFactory, nullable); - case MAP: - return new MapTreeWriter(streamFactory.getNextColumnId(), inspector, - streamFactory, nullable); - case LIST: - return new ListTreeWriter(streamFactory.getNextColumnId(), inspector, - streamFactory, nullable); - case UNION: - return new UnionTreeWriter(streamFactory.getNextColumnId(), inspector, - streamFactory, nullable); - default: - throw new IllegalArgumentException("Bad category: " + - inspector.getCategory()); - } - } - - private static void writeTypes(OrcProto.Footer.Builder builder, - TreeWriter treeWriter) { - OrcProto.Type.Builder type = OrcProto.Type.newBuilder(); - switch (treeWriter.inspector.getCategory()) { - case PRIMITIVE: - switch (((PrimitiveObjectInspector) treeWriter.inspector).getPrimitiveCategory()) { - case BOOLEAN: - type.setKind(OrcProto.Type.Kind.BOOLEAN); - break; - case BYTE: - type.setKind(OrcProto.Type.Kind.BYTE); - break; - case SHORT: - type.setKind(OrcProto.Type.Kind.SHORT); - break; - case INT: - type.setKind(OrcProto.Type.Kind.INT); - break; - case LONG: - type.setKind(OrcProto.Type.Kind.LONG); - break; - case FLOAT: - type.setKind(OrcProto.Type.Kind.FLOAT); - break; - case DOUBLE: - type.setKind(OrcProto.Type.Kind.DOUBLE); - break; - case STRING: - type.setKind(OrcProto.Type.Kind.STRING); - break; - case CHAR: - // The char length needs to be written to file and should be available - // from the object inspector - CharTypeInfo charTypeInfo = (CharTypeInfo) ((PrimitiveObjectInspector) treeWriter.inspector).getTypeInfo(); - type.setKind(Type.Kind.CHAR); - type.setMaximumLength(charTypeInfo.getLength()); - break; - case VARCHAR: - // The varchar length needs to be written to file and should be available - // from the object inspector - VarcharTypeInfo typeInfo = (VarcharTypeInfo) ((PrimitiveObjectInspector) treeWriter.inspector).getTypeInfo(); - type.setKind(Type.Kind.VARCHAR); - type.setMaximumLength(typeInfo.getLength()); - break; - case BINARY: - type.setKind(OrcProto.Type.Kind.BINARY); - break; - case TIMESTAMP: - type.setKind(OrcProto.Type.Kind.TIMESTAMP); - break; - case DATE: - type.setKind(OrcProto.Type.Kind.DATE); - break; - case DECIMAL: - DecimalTypeInfo decTypeInfo = (DecimalTypeInfo) ((PrimitiveObjectInspector) treeWriter.inspector).getTypeInfo(); - type.setKind(OrcProto.Type.Kind.DECIMAL); - type.setPrecision(decTypeInfo.precision()); - type.setScale(decTypeInfo.scale()); - break; - default: - throw new IllegalArgumentException("Unknown primitive category: " - + ((PrimitiveObjectInspector) treeWriter.inspector).getPrimitiveCategory()); - } - break; - case LIST: - type.setKind(OrcProto.Type.Kind.LIST); - type.addSubtypes(treeWriter.childrenWriters[0].id); - break; - case MAP: - type.setKind(OrcProto.Type.Kind.MAP); - type.addSubtypes(treeWriter.childrenWriters[0].id); - type.addSubtypes(treeWriter.childrenWriters[1].id); - break; - case STRUCT: - type.setKind(OrcProto.Type.Kind.STRUCT); - for (TreeWriter child : treeWriter.childrenWriters) { - type.addSubtypes(child.id); - } - for (StructField field : ((StructTreeWriter) treeWriter).fields) { - type.addFieldNames(field.getFieldName()); - } - break; - case UNION: - type.setKind(OrcProto.Type.Kind.UNION); - for (TreeWriter child : treeWriter.childrenWriters) { - type.addSubtypes(child.id); - } - break; - default: - throw new IllegalArgumentException("Unknown category: " + treeWriter.inspector.getCategory()); - } - builder.addTypes(type); - for (TreeWriter child : treeWriter.childrenWriters) { - writeTypes(builder, child); - } - } - - @VisibleForTesting - public OutputStream getStream() throws IOException { - if (rawWriter == null) { - rawWriter = new ByteCountingOutputStream(flowFileOutputStream); - rawWriter.write(OrcFile.MAGIC.getBytes()); - headerLength = rawWriter.getBytesWritten(); - writer = new OutStream("metadata", bufferSize, codec, - new DirectStream(rawWriter)); - protobufWriter = CodedOutputStream.newInstance(writer); - } - return rawWriter; - } - - private void createRowIndexEntry() throws IOException { - treeWriter.createRowIndexEntry(); - rowsInIndex = 0; - } - - private void flushStripe() throws IOException { - getStream(); - if (buildIndex && rowsInIndex != 0) { - createRowIndexEntry(); - } - if (rowsInStripe != 0) { - if (callback != null) { - callback.preStripeWrite(callbackContext); - } - // finalize the data for the stripe - int requiredIndexEntries = rowIndexStride == 0 ? 0 : - (int) ((rowsInStripe + rowIndexStride - 1) / rowIndexStride); - OrcProto.StripeFooter.Builder builder = - OrcProto.StripeFooter.newBuilder(); - treeWriter.writeStripe(builder, requiredIndexEntries); - long indexSize = 0; - long dataSize = 0; - for (Map.Entry pair : streams.entrySet()) { - BufferedStream stream = pair.getValue(); - if (!stream.isSuppressed()) { - stream.flush(); - StreamName name = pair.getKey(); - long streamSize = pair.getValue().getOutputSize(); - builder.addStreams(OrcProto.Stream.newBuilder() - .setColumn(name.getColumn()) - .setKind(name.getKind()) - .setLength(streamSize)); - if (StreamName.Area.INDEX == name.getArea()) { - indexSize += streamSize; - } else { - dataSize += streamSize; - } - } - } - OrcProto.StripeFooter footer = builder.build(); - - // Do we need to pad the file so the stripe doesn't straddle a block - // boundary? - long start = rawWriter.getBytesWritten(); - final long currentStripeSize = indexSize + dataSize + footer.getSerializedSize(); - final long available = blockSize - (start % blockSize); - final long overflow = currentStripeSize - adjustedStripeSize; - final float availRatio = (float) available / (float) defaultStripeSize; - - if (availRatio > 0.0f && availRatio < 1.0f - && availRatio > paddingTolerance) { - // adjust default stripe size to fit into remaining space, also adjust - // the next stripe for correction based on the current stripe size - // and user specified padding tolerance. Since stripe size can overflow - // the default stripe size we should apply this correction to avoid - // writing portion of last stripe to next hdfs block. - float correction = overflow > 0 ? (float) overflow - / (float) adjustedStripeSize : 0.0f; - - // correction should not be greater than user specified padding - // tolerance - correction = correction > paddingTolerance ? paddingTolerance - : correction; - - // adjust next stripe size based on current stripe estimate correction - adjustedStripeSize = (long) ((1.0f - correction) * (availRatio * defaultStripeSize)); - } else if (availRatio >= 1.0) { - adjustedStripeSize = defaultStripeSize; - } - - if (availRatio < paddingTolerance && addBlockPadding) { - long padding = blockSize - (start % blockSize); - byte[] pad = new byte[(int) Math.min(HDFS_BUFFER_SIZE, padding)]; - LOG.info(String.format("Padding ORC by %d bytes (<= %.2f * %d)", - padding, availRatio, defaultStripeSize)); - start += padding; - while (padding > 0) { - int writeLen = (int) Math.min(padding, pad.length); - rawWriter.write(pad, 0, writeLen); - padding -= writeLen; - } - adjustedStripeSize = defaultStripeSize; - } else if (currentStripeSize < blockSize - && (start % blockSize) + currentStripeSize > blockSize) { - // even if you don't pad, reset the default stripe size when crossing a - // block boundary - adjustedStripeSize = defaultStripeSize; - } - - // write out the data streams - for (Map.Entry pair : streams.entrySet()) { - BufferedStream stream = pair.getValue(); - if (!stream.isSuppressed()) { - stream.spillTo(rawWriter); - } - stream.clear(); - } - footer.writeTo(protobufWriter); - protobufWriter.flush(); - writer.flush(); - long footerLength = rawWriter.getBytesWritten() - start - dataSize - indexSize; - OrcProto.StripeInformation dirEntry = - OrcProto.StripeInformation.newBuilder() - .setOffset(start) - .setNumberOfRows(rowsInStripe) - .setIndexLength(indexSize) - .setDataLength(dataSize) - .setFooterLength(footerLength).build(); - stripes.add(dirEntry); - rowCount += rowsInStripe; - rowsInStripe = 0; - } - } - - private long computeRawDataSize() { - long result = 0; - for (TreeWriter child : treeWriter.getChildrenWriters()) { - result += getRawDataSizeFromInspectors(child, child.inspector); - } - return result; - } - - private long getRawDataSizeFromInspectors(TreeWriter child, ObjectInspector oi) { - long total = 0; - switch (oi.getCategory()) { - case PRIMITIVE: - total += getRawDataSizeFromPrimitives(child, oi); - break; - case LIST: - case MAP: - case UNION: - case STRUCT: - for (TreeWriter tw : child.childrenWriters) { - total += getRawDataSizeFromInspectors(tw, tw.inspector); - } - break; - default: - LOG.debug("Unknown object inspector category."); - break; - } - return total; - } - - private long getRawDataSizeFromPrimitives(TreeWriter child, ObjectInspector oi) { - long result = 0; - long numVals = child.fileStatistics.getNumberOfValues(); - switch (((PrimitiveObjectInspector) oi).getPrimitiveCategory()) { - case BOOLEAN: - case BYTE: - case SHORT: - case INT: - case FLOAT: - return numVals * JavaDataModel.get().primitive1(); - case LONG: - case DOUBLE: - return numVals * JavaDataModel.get().primitive2(); - case STRING: - case VARCHAR: - case CHAR: - // ORC strings are converted to java Strings. so use JavaDataModel to - // compute the overall size of strings - child = (StringTreeWriter) child; - StringColumnStatistics scs = (StringColumnStatistics) child.fileStatistics; - numVals = numVals == 0 ? 1 : numVals; - int avgStringLen = (int) (scs.getSum() / numVals); - return numVals * JavaDataModel.get().lengthForStringOfLength(avgStringLen); - case DECIMAL: - return numVals * JavaDataModel.get().lengthOfDecimal(); - case DATE: - return numVals * JavaDataModel.get().lengthOfDate(); - case BINARY: - // get total length of binary blob - BinaryColumnStatistics bcs = (BinaryColumnStatistics) child.fileStatistics; - return bcs.getSum(); - case TIMESTAMP: - return numVals * JavaDataModel.get().lengthOfTimestamp(); - default: - LOG.debug("Unknown primitive category."); - break; - } - - return result; - } - - private OrcProto.CompressionKind writeCompressionKind(CompressionKind kind) { - switch (kind) { - case NONE: - return OrcProto.CompressionKind.NONE; - case ZLIB: - return OrcProto.CompressionKind.ZLIB; - case SNAPPY: - return OrcProto.CompressionKind.SNAPPY; - case LZO: - return OrcProto.CompressionKind.LZO; - default: - throw new IllegalArgumentException("Unknown compression " + kind); - } - } - - private void writeFileStatistics(OrcProto.Footer.Builder builder, - TreeWriter writer) throws IOException { - builder.addStatistics(writer.fileStatistics.serialize()); - for (TreeWriter child : writer.getChildrenWriters()) { - writeFileStatistics(builder, child); - } - } - - private int writeMetadata() throws IOException { - getStream(); - OrcProto.Metadata.Builder builder = OrcProto.Metadata.newBuilder(); - for (OrcProto.StripeStatistics.Builder ssb : treeWriter.stripeStatsBuilders) { - builder.addStripeStats(ssb.build()); - } - - long startPosn = rawWriter.getBytesWritten(); - OrcProto.Metadata metadata = builder.build(); - metadata.writeTo(protobufWriter); - protobufWriter.flush(); - writer.flush(); - return (int) (rawWriter.getBytesWritten() - startPosn); - } - - private int writeFooter(long bodyLength) throws IOException { - getStream(); - OrcProto.Footer.Builder builder = OrcProto.Footer.newBuilder(); - builder.setContentLength(bodyLength); - builder.setHeaderLength(headerLength); - builder.setNumberOfRows(rowCount); - builder.setRowIndexStride(rowIndexStride); - // populate raw data size - rawDataSize = computeRawDataSize(); - // serialize the types - writeTypes(builder, treeWriter); - // add the stripe information - for (OrcProto.StripeInformation stripe : stripes) { - builder.addStripes(stripe); - } - // add the column statistics - writeFileStatistics(builder, treeWriter); - // add all of the user metadata - for (Map.Entry entry : userMetadata.entrySet()) { - builder.addMetadata(OrcProto.UserMetadataItem.newBuilder() - .setName(entry.getKey()).setValue(entry.getValue())); - } - long startPosn = rawWriter.getBytesWritten(); - OrcProto.Footer footer = builder.build(); - footer.writeTo(protobufWriter); - protobufWriter.flush(); - writer.flush(); - return (int) (rawWriter.getBytesWritten() - startPosn); - } - - private int writePostScript(int footerLength, int metadataLength) throws IOException { - OrcProto.PostScript.Builder builder = - OrcProto.PostScript.newBuilder() - .setCompression(writeCompressionKind(compress)) - .setFooterLength(footerLength) - .setMetadataLength(metadataLength) - .setMagic(OrcFile.MAGIC) - .addVersion(version.getMajor()) - .addVersion(version.getMinor()) - .setWriterVersion(OrcFile.WriterVersion.HIVE_8732.getId()); - if (compress != CompressionKind.NONE) { - builder.setCompressionBlockSize(bufferSize); - } - OrcProto.PostScript ps = builder.build(); - // need to write this uncompressed - long startPosn = rawWriter.getBytesWritten(); - ps.writeTo(rawWriter); - long length = rawWriter.getBytesWritten() - startPosn; - if (length > 255) { - throw new IllegalArgumentException("PostScript too large at " + length); - } - return (int) length; - } - - private long estimateStripeSize() { - long result = 0; - for (BufferedStream stream : streams.values()) { - result += stream.getBufferSize(); - } - result += treeWriter.estimateMemory(); - return result; - } - - @Override - public synchronized void addUserMetadata(String name, ByteBuffer value) { - userMetadata.put(name, ByteString.copyFrom(value)); - } - - @Override - public void addRow(Object row) throws IOException { - synchronized (this) { - treeWriter.write(row); - rowsInStripe += 1; - if (buildIndex) { - rowsInIndex += 1; - - if (rowsInIndex >= rowIndexStride) { - createRowIndexEntry(); - } - } - } - memoryManager.addedRow(); - } - - public void addRowBatch(VectorizedRowBatch batch) throws IOException { - if (buildIndex) { - // Batch the writes up to the rowIndexStride so that we can get the - // right size indexes. - int posn = 0; - while (posn < batch.size) { - int chunkSize = Math.min(batch.size - posn, - rowIndexStride - rowsInIndex); - treeWriter.writeRootBatch(batch, posn, chunkSize); - posn += chunkSize; - rowsInIndex += chunkSize; - rowsInStripe += chunkSize; - if (rowsInIndex >= rowIndexStride) { - createRowIndexEntry(); - } - } - } else { - rowsInStripe += batch.size; - treeWriter.writeRootBatch(batch, 0, batch.size); - } - memoryManager.addedRow(); - } - - - @Override - public void close() throws IOException { - if (callback != null) { - callback.preFooterWrite(callbackContext); - } - // remove us from the memory manager so that we don't get any callbacks - memoryManager.removeWriter(path); - // actually close the file - flushStripe(); - int metadataLength = writeMetadata(); - int footerLength = writeFooter(rawWriter.getBytesWritten() - metadataLength); - rawWriter.write(writePostScript(footerLength, metadataLength)); - rawWriter.close(); - - } - - /** - * Raw data size will be compute when writing the file footer. Hence raw data - * size value will be available only after closing the writer. - */ - @Override - public long getRawDataSize() { - return rawDataSize; - } - - /** - * Row count gets updated when flushing the stripes. To get accurate row - * count call this method after writer is closed. - */ - @Override - public long getNumberOfRows() { - return rowCount; - } - - @Override - public long writeIntermediateFooter() throws IOException { - // flush any buffered rows - flushStripe(); - // write a footer - if (stripesAtLastFlush != stripes.size()) { - if (callback != null) { - callback.preFooterWrite(callbackContext); - } - int metaLength = writeMetadata(); - int footLength = writeFooter(rawWriter.getBytesWritten() - metaLength); - rawWriter.write(writePostScript(footLength, metaLength)); - stripesAtLastFlush = stripes.size(); - rawWriter.flush(); - } - return rawWriter.getBytesWritten(); - } - - @Override - public void appendStripe(byte[] stripe, int offset, int length, - StripeInformation stripeInfo, - OrcProto.StripeStatistics stripeStatistics) throws IOException { - Objects.requireNonNull(stripe, "Stripe must not be null"); - if (length > stripe.length) { - throw new IllegalArgumentException("Specified length must not be greater specified array length"); - } - Objects.requireNonNull(stripeInfo, "Stripe information must not be null"); - Objects.requireNonNull(stripeStatistics, - "Stripe statistics must not be null"); - - getStream(); - long start = rawWriter.getBytesWritten(); - long stripeLen = length; - long availBlockSpace = blockSize - (start % blockSize); - - // see if stripe can fit in the current hdfs block, else pad the remaining - // space in the block - if (stripeLen < blockSize && stripeLen > availBlockSpace && addBlockPadding) { - byte[] pad = new byte[(int) Math.min(HDFS_BUFFER_SIZE, availBlockSpace)]; - LOG.info(String.format("Padding ORC by %d bytes while merging..", - availBlockSpace)); - start += availBlockSpace; - while (availBlockSpace > 0) { - int writeLen = (int) Math.min(availBlockSpace, pad.length); - rawWriter.write(pad, 0, writeLen); - availBlockSpace -= writeLen; - } - } - - rawWriter.write(stripe); - rowsInStripe = stripeStatistics.getColStats(0).getNumberOfValues(); - rowCount += rowsInStripe; - - // since we have already written the stripe, just update stripe statistics - treeWriter.stripeStatsBuilders.add(stripeStatistics.toBuilder()); - - // update file level statistics - updateFileStatistics(stripeStatistics); - - // update stripe information - OrcProto.StripeInformation dirEntry = OrcProto.StripeInformation - .newBuilder() - .setOffset(start) - .setNumberOfRows(rowsInStripe) - .setIndexLength(stripeInfo.getIndexLength()) - .setDataLength(stripeInfo.getDataLength()) - .setFooterLength(stripeInfo.getFooterLength()) - .build(); - stripes.add(dirEntry); - - // reset it after writing the stripe - rowsInStripe = 0; - } - - private void updateFileStatistics(OrcProto.StripeStatistics stripeStatistics) { - List cs = stripeStatistics.getColStatsList(); - List allWriters = getAllColumnTreeWriters(treeWriter); - for (int i = 0; i < allWriters.size(); i++) { - allWriters.get(i).fileStatistics.merge(ColumnStatisticsImpl.deserialize(cs.get(i))); - } - } - - private List getAllColumnTreeWriters(TreeWriter rootTreeWriter) { - List result = Lists.newArrayList(); - getAllColumnTreeWritersImpl(rootTreeWriter, result); - return result; - } - - private void getAllColumnTreeWritersImpl(TreeWriter tw, - List result) { - result.add(tw); - for (TreeWriter child : tw.childrenWriters) { - getAllColumnTreeWritersImpl(child, result); - } - } - - @Override - public void appendUserMetadata(List userMetadata) { - if (userMetadata != null) { - for (UserMetadataItem item : userMetadata) { - this.userMetadata.put(item.getName(), item.getValue()); - } - } - } -} diff --git a/nifi-nar-bundles/nifi-hive-bundle/nifi-hive-processors/src/main/java/org/apache/nifi/dbcp/hive/HiveConnectionPool.java b/nifi-nar-bundles/nifi-hive-bundle/nifi-hive-processors/src/main/java/org/apache/nifi/dbcp/hive/HiveConnectionPool.java deleted file mode 100644 index d9b2999378..0000000000 --- a/nifi-nar-bundles/nifi-hive-bundle/nifi-hive-processors/src/main/java/org/apache/nifi/dbcp/hive/HiveConnectionPool.java +++ /dev/null @@ -1,459 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.nifi.dbcp.hive; - -import org.apache.commons.dbcp2.BasicDataSource; -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.security.UserGroupInformation; -import org.apache.hive.jdbc.HiveDriver; -import org.apache.nifi.annotation.behavior.RequiresInstanceClassLoading; -import org.apache.nifi.annotation.documentation.CapabilityDescription; -import org.apache.nifi.annotation.documentation.DeprecationNotice; -import org.apache.nifi.annotation.documentation.Tags; -import org.apache.nifi.annotation.lifecycle.OnDisabled; -import org.apache.nifi.annotation.lifecycle.OnEnabled; -import org.apache.nifi.components.PropertyDescriptor; -import org.apache.nifi.components.PropertyValue; -import org.apache.nifi.components.ValidationContext; -import org.apache.nifi.components.ValidationResult; -import org.apache.nifi.components.resource.ResourceCardinality; -import org.apache.nifi.components.resource.ResourceType; -import org.apache.nifi.controller.AbstractControllerService; -import org.apache.nifi.controller.ConfigurationContext; -import org.apache.nifi.controller.ControllerServiceInitializationContext; -import org.apache.nifi.dbcp.DBCPValidator; -import org.apache.nifi.expression.ExpressionLanguageScope; -import org.apache.nifi.hadoop.KerberosProperties; -import org.apache.nifi.hadoop.SecurityUtil; -import org.apache.nifi.kerberos.KerberosCredentialsService; -import org.apache.nifi.logging.ComponentLog; -import org.apache.nifi.processor.exception.ProcessException; -import org.apache.nifi.processor.util.StandardValidators; -import org.apache.nifi.reporting.InitializationException; -import org.apache.nifi.security.krb.KerberosKeytabUser; -import org.apache.nifi.security.krb.KerberosLoginException; -import org.apache.nifi.security.krb.KerberosPasswordUser; -import org.apache.nifi.security.krb.KerberosUser; -import org.apache.nifi.util.hive.AuthenticationFailedException; -import org.apache.nifi.util.hive.HiveConfigurator; -import org.apache.nifi.util.hive.ValidationResources; - -import java.io.File; -import java.io.IOException; -import java.lang.reflect.UndeclaredThrowableException; -import java.security.PrivilegedExceptionAction; -import java.sql.Connection; -import java.sql.SQLException; -import java.util.ArrayList; -import java.util.Collection; -import java.util.List; -import java.util.Map; -import java.util.concurrent.TimeUnit; -import java.util.concurrent.atomic.AtomicReference; - -/** - * Implementation for Database Connection Pooling Service used for Apache Hive - * connections. Apache DBCP is used for connection pooling functionality. - */ -@RequiresInstanceClassLoading -@Tags({"hive", "dbcp", "jdbc", "database", "connection", "pooling", "store"}) -@CapabilityDescription("Provides Database Connection Pooling Service for Apache Hive. Connections can be asked from pool and returned after usage.") -@DeprecationNotice(classNames = "org.apache.nifi.dbcp.hive.Hive3ConnectionPool") -public class HiveConnectionPool extends AbstractControllerService implements HiveDBCPService { - private static final String ALLOW_EXPLICIT_KEYTAB = "NIFI_ALLOW_EXPLICIT_KEYTAB"; - - private static final String DEFAULT_MAX_CONN_LIFETIME = "-1"; - - public static final PropertyDescriptor DATABASE_URL = new PropertyDescriptor.Builder() - .name("hive-db-connect-url") - .displayName("Database Connection URL") - .description("A database connection URL used to connect to a database. May contain database system name, host, port, database name and some parameters." - + " The exact syntax of a database connection URL is specified by the Hive documentation. For example, the server principal is often included " - + "as a connection parameter when connecting to a secure Hive server.") - .defaultValue(null) - .addValidator(StandardValidators.NON_EMPTY_VALIDATOR) - .required(true) - .expressionLanguageSupported(ExpressionLanguageScope.VARIABLE_REGISTRY) - .build(); - - public static final PropertyDescriptor HIVE_CONFIGURATION_RESOURCES = new PropertyDescriptor.Builder() - .name("hive-config-resources") - .displayName("Hive Configuration Resources") - .description("A file or comma separated list of files which contains the Hive configuration (hive-site.xml, e.g.). Without this, Hadoop " - + "will search the classpath for a 'hive-site.xml' file or will revert to a default configuration. Note that to enable authentication " - + "with Kerberos e.g., the appropriate properties must be set in the configuration files. Please see the Hive documentation for more details.") - .required(false) - .identifiesExternalResource(ResourceCardinality.MULTIPLE, ResourceType.FILE) - .expressionLanguageSupported(ExpressionLanguageScope.VARIABLE_REGISTRY) - .build(); - - public static final PropertyDescriptor DB_USER = new PropertyDescriptor.Builder() - .name("hive-db-user") - .displayName("Database User") - .description("Database user name") - .defaultValue(null) - .addValidator(StandardValidators.NON_EMPTY_VALIDATOR) - .expressionLanguageSupported(ExpressionLanguageScope.VARIABLE_REGISTRY) - .build(); - - public static final PropertyDescriptor DB_PASSWORD = new PropertyDescriptor.Builder() - .name("hive-db-password") - .displayName("Password") - .description("The password for the database user") - .defaultValue(null) - .required(false) - .sensitive(true) - .addValidator(StandardValidators.NON_EMPTY_VALIDATOR) - .expressionLanguageSupported(ExpressionLanguageScope.VARIABLE_REGISTRY) - .build(); - - public static final PropertyDescriptor MAX_WAIT_TIME = new PropertyDescriptor.Builder() - .name("hive-max-wait-time") - .displayName("Max Wait Time") - .description("The maximum amount of time that the pool will wait (when there are no available connections) " - + " for a connection to be returned before failing, or -1 to wait indefinitely. ") - .defaultValue("500 millis") - .required(true) - .addValidator(StandardValidators.TIME_PERIOD_VALIDATOR) - .expressionLanguageSupported(ExpressionLanguageScope.VARIABLE_REGISTRY) - .build(); - - public static final PropertyDescriptor MAX_TOTAL_CONNECTIONS = new PropertyDescriptor.Builder() - .name("hive-max-total-connections") - .displayName("Max Total Connections") - .description("The maximum number of active connections that can be allocated from this pool at the same time, " - + "or negative for no limit.") - .defaultValue("8") - .required(true) - .addValidator(StandardValidators.INTEGER_VALIDATOR) - .expressionLanguageSupported(ExpressionLanguageScope.VARIABLE_REGISTRY) - .build(); - - public static final PropertyDescriptor MAX_CONN_LIFETIME = new PropertyDescriptor.Builder() - .displayName("Max Connection Lifetime") - .name("hive-max-conn-lifetime") - .description("The maximum lifetime in milliseconds of a connection. After this time is exceeded the " + - "connection pool will invalidate the connection. A value of zero or -1 " + - "means the connection has an infinite lifetime.") - .defaultValue(DEFAULT_MAX_CONN_LIFETIME) - .required(true) - .addValidator(DBCPValidator.CUSTOM_TIME_PERIOD_VALIDATOR) - .expressionLanguageSupported(ExpressionLanguageScope.VARIABLE_REGISTRY) - .build(); - - public static final PropertyDescriptor VALIDATION_QUERY = new PropertyDescriptor.Builder() - .name("Validation-query") - .displayName("Validation query") - .description("Validation query used to validate connections before returning them. " - + "When a borrowed connection is invalid, it gets dropped and a new valid connection will be returned. " - + "NOTE: Using validation may have a performance penalty.") - .required(false) - .addValidator(StandardValidators.NON_EMPTY_VALIDATOR) - .expressionLanguageSupported(ExpressionLanguageScope.VARIABLE_REGISTRY) - .build(); - - static final PropertyDescriptor KERBEROS_CREDENTIALS_SERVICE = new PropertyDescriptor.Builder() - .name("kerberos-credentials-service") - .displayName("Kerberos Credentials Service") - .description("Specifies the Kerberos Credentials Controller Service that should be used for authenticating with Kerberos") - .identifiesControllerService(KerberosCredentialsService.class) - .required(false) - .build(); - - - private List properties; - - private String connectionUrl = "unknown"; - - // Holder of cached Configuration information so validation does not reload the same config over and over - private final AtomicReference validationResourceHolder = new AtomicReference<>(); - - private volatile BasicDataSource dataSource; - - private volatile HiveConfigurator hiveConfigurator = new HiveConfigurator(); - private volatile UserGroupInformation ugi; - private final AtomicReference kerberosUserReference = new AtomicReference<>(); - private volatile File kerberosConfigFile = null; - private volatile KerberosProperties kerberosProperties; - - @Override - protected void init(final ControllerServiceInitializationContext context) { - List props = new ArrayList<>(); - props.add(DATABASE_URL); - props.add(HIVE_CONFIGURATION_RESOURCES); - props.add(DB_USER); - props.add(DB_PASSWORD); - props.add(MAX_WAIT_TIME); - props.add(MAX_TOTAL_CONNECTIONS); - props.add(MAX_CONN_LIFETIME); - props.add(VALIDATION_QUERY); - props.add(KERBEROS_CREDENTIALS_SERVICE); - - kerberosConfigFile = context.getKerberosConfigurationFile(); - kerberosProperties = new KerberosProperties(kerberosConfigFile); - props.add(kerberosProperties.getKerberosPrincipal()); - props.add(kerberosProperties.getKerberosKeytab()); - props.add(kerberosProperties.getKerberosPassword()); - properties = props; - } - - @Override - protected List getSupportedPropertyDescriptors() { - return properties; - } - - @Override - protected Collection customValidate(ValidationContext validationContext) { - boolean confFileProvided = validationContext.getProperty(HIVE_CONFIGURATION_RESOURCES).isSet(); - - final List problems = new ArrayList<>(); - - if (confFileProvided) { - final String explicitPrincipal = validationContext.getProperty(kerberosProperties.getKerberosPrincipal()).evaluateAttributeExpressions().getValue(); - final String explicitKeytab = validationContext.getProperty(kerberosProperties.getKerberosKeytab()).evaluateAttributeExpressions().getValue(); - final String explicitPassword = validationContext.getProperty(kerberosProperties.getKerberosPassword()).getValue(); - final KerberosCredentialsService credentialsService = validationContext.getProperty(KERBEROS_CREDENTIALS_SERVICE).asControllerService(KerberosCredentialsService.class); - - final String resolvedPrincipal; - final String resolvedKeytab; - if (credentialsService != null) { - resolvedPrincipal = credentialsService.getPrincipal(); - resolvedKeytab = credentialsService.getKeytab(); - } else { - resolvedPrincipal = explicitPrincipal; - resolvedKeytab = explicitKeytab; - } - - - final String configFiles = validationContext.getProperty(HIVE_CONFIGURATION_RESOURCES).evaluateAttributeExpressions().getValue(); - problems.addAll(hiveConfigurator.validate(configFiles, resolvedPrincipal, resolvedKeytab, explicitPassword, validationResourceHolder, getLogger())); - - if (credentialsService != null && (explicitPrincipal != null || explicitKeytab != null || explicitPassword != null)) { - problems.add(new ValidationResult.Builder() - .subject("Kerberos Credentials") - .valid(false) - .explanation("Cannot specify a Kerberos Credentials Service while also specifying a Kerberos Principal, Kerberos Keytab, or Kerberos Password") - .build()); - } - - if (!isAllowExplicitKeytab() && explicitKeytab != null) { - problems.add(new ValidationResult.Builder() - .subject("Kerberos Credentials") - .valid(false) - .explanation("The '" + ALLOW_EXPLICIT_KEYTAB + "' system environment variable is configured to forbid explicitly configuring Kerberos Keytab in processors. " - + "The Kerberos Credentials Service should be used instead of setting the Kerberos Keytab or Kerberos Principal property.") - .build()); - } - } - - return problems; - } - - /** - * Configures connection pool by creating an instance of the - * {@link BasicDataSource} based on configuration provided with - * {@link ConfigurationContext}. - *

- * This operation makes no guarantees that the actual connection could be - * made since the underlying system may still go off-line during normal - * operation of the connection pool. - *

- * As of Apache NiFi 1.5.0, due to changes made to - * {@link SecurityUtil#loginKerberos(Configuration, String, String)}, which is used by this class invoking - * {@link HiveConfigurator#authenticate(Configuration, String, String)} - * to authenticate a principal with Kerberos, Hive controller services no longer use a separate thread to - * relogin, and instead call {@link UserGroupInformation#checkTGTAndReloginFromKeytab()} from - * {@link HiveConnectionPool#getConnection()}. The relogin request is performed in a synchronized block to prevent - * threads from requesting concurrent relogins. For more information, please read the documentation for - * {@link SecurityUtil#loginKerberos(Configuration, String, String)}. - *

- * In previous versions of NiFi, a {@link org.apache.nifi.hadoop.KerberosTicketRenewer} was started by - * {@link HiveConfigurator#authenticate(Configuration, String, String, long)} when the Hive - * controller service was enabled. The use of a separate thread to explicitly relogin could cause race conditions - * with the implicit relogin attempts made by hadoop/Hive code on a thread that references the same - * {@link UserGroupInformation} instance. One of these threads could leave the - * {@link javax.security.auth.Subject} in {@link UserGroupInformation} to be cleared or in an unexpected state - * while the other thread is attempting to use the {@link javax.security.auth.Subject}, resulting in failed - * authentication attempts that would leave the Hive controller service in an unrecoverable state. - * - * @see SecurityUtil#loginKerberos(Configuration, String, String) - * @see HiveConfigurator#authenticate(Configuration, String, String) - * @see HiveConfigurator#authenticate(Configuration, String, String, long) - * @param context the configuration context - * @throws InitializationException if unable to create a database connection - */ - @OnEnabled - public void onConfigured(final ConfigurationContext context) throws InitializationException { - - ComponentLog log = getLogger(); - - final String configFiles = context.getProperty(HIVE_CONFIGURATION_RESOURCES).evaluateAttributeExpressions().getValue(); - final Configuration hiveConfig = hiveConfigurator.getConfigurationFromFiles(configFiles); - final String validationQuery = context.getProperty(VALIDATION_QUERY).evaluateAttributeExpressions().getValue(); - - // add any dynamic properties to the Hive configuration - for (final Map.Entry entry : context.getProperties().entrySet()) { - final PropertyDescriptor descriptor = entry.getKey(); - if (descriptor.isDynamic()) { - hiveConfig.set(descriptor.getName(), context.getProperty(descriptor).evaluateAttributeExpressions().getValue()); - } - } - - final String drv = HiveDriver.class.getName(); - if (SecurityUtil.isSecurityEnabled(hiveConfig)) { - final String explicitPrincipal = context.getProperty(kerberosProperties.getKerberosPrincipal()).evaluateAttributeExpressions().getValue(); - final String explicitKeytab = context.getProperty(kerberosProperties.getKerberosKeytab()).evaluateAttributeExpressions().getValue(); - final String explicitPassword = context.getProperty(kerberosProperties.getKerberosPassword()).getValue(); - final KerberosCredentialsService credentialsService = context.getProperty(KERBEROS_CREDENTIALS_SERVICE).asControllerService(KerberosCredentialsService.class); - - final String resolvedPrincipal; - final String resolvedKeytab; - if (credentialsService != null) { - resolvedPrincipal = credentialsService.getPrincipal(); - resolvedKeytab = credentialsService.getKeytab(); - } else { - resolvedPrincipal = explicitPrincipal; - resolvedKeytab = explicitKeytab; - } - - if (resolvedKeytab != null) { - kerberosUserReference.set(new KerberosKeytabUser(resolvedPrincipal, resolvedKeytab)); - log.info("Hive Security Enabled, logging in as principal {} with keytab {}", new Object[] {resolvedPrincipal, resolvedKeytab}); - } else if (explicitPassword != null) { - kerberosUserReference.set(new KerberosPasswordUser(resolvedPrincipal, explicitPassword)); - log.info("Hive Security Enabled, logging in as principal {} with password", new Object[] {resolvedPrincipal}); - } else { - throw new InitializationException("Unable to authenticate with Kerberos, no keytab or password was provided"); - } - - try { - ugi = hiveConfigurator.authenticate(hiveConfig, kerberosUserReference.get()); - } catch (AuthenticationFailedException ae) { - log.error(ae.getMessage(), ae); - throw new InitializationException(ae); - } - - getLogger().info("Successfully logged in as principal " + resolvedPrincipal); - } - - final String user = context.getProperty(DB_USER).evaluateAttributeExpressions().getValue(); - final String passw = context.getProperty(DB_PASSWORD).evaluateAttributeExpressions().getValue(); - final Long maxWaitMillis = context.getProperty(MAX_WAIT_TIME).evaluateAttributeExpressions().asTimePeriod(TimeUnit.MILLISECONDS); - final Integer maxTotal = context.getProperty(MAX_TOTAL_CONNECTIONS).evaluateAttributeExpressions().asInteger(); - final long maxConnectionLifetimeMillis = extractMillisWithInfinite(context.getProperty(MAX_CONN_LIFETIME).evaluateAttributeExpressions()); - - dataSource = new BasicDataSource(); - dataSource.setDriverClassName(drv); - - connectionUrl = context.getProperty(DATABASE_URL).evaluateAttributeExpressions().getValue(); - - dataSource.setMaxWaitMillis(maxWaitMillis); - dataSource.setMaxTotal(maxTotal); - dataSource.setMaxConnLifetimeMillis(maxConnectionLifetimeMillis); - - if (validationQuery != null && !validationQuery.isEmpty()) { - dataSource.setValidationQuery(validationQuery); - dataSource.setTestOnBorrow(true); - } - - dataSource.setUrl(connectionUrl); - dataSource.setUsername(user); - dataSource.setPassword(passw); - } - - /** - * Shutdown pool, close all open connections. - */ - @OnDisabled - public void shutdown() { - try { - dataSource.close(); - } catch (final SQLException e) { - throw new ProcessException(e); - } - } - - @Override - public Connection getConnection() throws ProcessException { - try { - if (ugi != null) { - /* - * Explicitly check the TGT and relogin if necessary with the KerberosUser instance. No synchronization - * is necessary in the client code, since AbstractKerberosUser's checkTGTAndRelogin method is synchronized. - */ - getLogger().trace("getting UGI instance"); - if (kerberosUserReference.get() != null) { - // if there's a KerberosUser associated with this UGI, check the TGT and relogin if it is close to expiring - KerberosUser kerberosUser = kerberosUserReference.get(); - getLogger().debug("kerberosUser is " + kerberosUser); - try { - getLogger().debug("checking TGT on kerberosUser [{}]", new Object[]{kerberosUser}); - kerberosUser.checkTGTAndRelogin(); - } catch (final KerberosLoginException e) { - throw new ProcessException("Unable to relogin with kerberos credentials for " + kerberosUser.getPrincipal(), e); - } - } else { - getLogger().debug("kerberosUser was null, will not refresh TGT with KerberosUser"); - // no synchronization is needed for UserGroupInformation.checkTGTAndReloginFromKeytab; UGI handles the synchronization internally - ugi.checkTGTAndReloginFromKeytab(); - } - try { - return ugi.doAs((PrivilegedExceptionAction) () -> dataSource.getConnection()); - } catch (UndeclaredThrowableException e) { - Throwable cause = e.getCause(); - if (cause instanceof SQLException) { - throw (SQLException) cause; - } else { - throw e; - } - } - } else { - getLogger().info("Simple Authentication"); - return dataSource.getConnection(); - } - } catch (SQLException | IOException | InterruptedException e) { - getLogger().error("Error getting Hive connection", e); - throw new ProcessException(e); - } - } - - @Override - public String toString() { - return "HiveConnectionPool[id=" + getIdentifier() + "]"; - } - - @Override - public String getConnectionURL() { - return connectionUrl; - } - - /* - * Overridable by subclasses in the same package, mainly intended for testing purposes to allow verification without having to set environment variables. - */ - boolean isAllowExplicitKeytab() { - return Boolean.parseBoolean(System.getenv(ALLOW_EXPLICIT_KEYTAB)); - } - - private long extractMillisWithInfinite(PropertyValue prop) { - if (prop.getValue() == null || DEFAULT_MAX_CONN_LIFETIME.equals(prop.getValue())) { - return -1; - } else { - return prop.asTimePeriod(TimeUnit.MILLISECONDS); - } - } - -} diff --git a/nifi-nar-bundles/nifi-hive-bundle/nifi-hive-processors/src/main/java/org/apache/nifi/processors/hive/AbstractHiveQLProcessor.java b/nifi-nar-bundles/nifi-hive-bundle/nifi-hive-processors/src/main/java/org/apache/nifi/processors/hive/AbstractHiveQLProcessor.java deleted file mode 100644 index f1ef5fd7e7..0000000000 --- a/nifi-nar-bundles/nifi-hive-bundle/nifi-hive-processors/src/main/java/org/apache/nifi/processors/hive/AbstractHiveQLProcessor.java +++ /dev/null @@ -1,344 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.nifi.processors.hive; - -import org.antlr.runtime.tree.CommonTree; -import org.apache.hadoop.hive.ql.parse.ASTNode; -import org.apache.hadoop.hive.ql.parse.ParseDriver; -import org.apache.hadoop.hive.ql.parse.ParseException; -import org.apache.nifi.components.PropertyDescriptor; -import org.apache.nifi.dbcp.hive.HiveDBCPService; -import org.apache.nifi.flowfile.FlowFile; -import org.apache.nifi.processor.AbstractSessionFactoryProcessor; -import org.apache.nifi.processor.ProcessSession; -import org.apache.nifi.processor.io.InputStreamCallback; -import org.apache.nifi.processor.util.StandardValidators; -import org.apache.nifi.stream.io.StreamUtils; - -import java.io.IOException; -import java.io.InputStream; -import java.math.BigDecimal; -import java.nio.charset.Charset; -import java.sql.Date; -import java.sql.PreparedStatement; -import java.sql.SQLDataException; -import java.sql.SQLException; -import java.sql.Time; -import java.sql.Timestamp; -import java.sql.Types; -import java.util.Collections; -import java.util.HashMap; -import java.util.HashSet; -import java.util.Map; -import java.util.Set; -import java.util.TreeMap; -import java.util.regex.Matcher; -import java.util.regex.Pattern; - -/** - * An abstract base class for HiveQL processors to share common data, methods, etc. - */ -public abstract class AbstractHiveQLProcessor extends AbstractSessionFactoryProcessor { - - protected static final Pattern HIVEQL_TYPE_ATTRIBUTE_PATTERN = Pattern.compile("hiveql\\.args\\.(\\d+)\\.type"); - protected static final Pattern NUMBER_PATTERN = Pattern.compile("-?\\d+"); - static String ATTR_INPUT_TABLES = "query.input.tables"; - static String ATTR_OUTPUT_TABLES = "query.output.tables"; - - - public static final PropertyDescriptor HIVE_DBCP_SERVICE = new PropertyDescriptor.Builder() - .name("Hive Database Connection Pooling Service") - .description("The Hive Controller Service that is used to obtain connection(s) to the Hive database") - .required(true) - .identifiesControllerService(HiveDBCPService.class) - .build(); - - public static final PropertyDescriptor CHARSET = new PropertyDescriptor.Builder() - .name("hive-charset") - .displayName("Character Set") - .description("Specifies the character set of the record data.") - .required(true) - .defaultValue("UTF-8") - .addValidator(StandardValidators.CHARACTER_SET_VALIDATOR) - .build(); - - /** - * Determines the HiveQL statement that should be executed for the given FlowFile - * - * @param session the session that can be used to access the given FlowFile - * @param flowFile the FlowFile whose HiveQL statement should be executed - * @return the HiveQL that is associated with the given FlowFile - */ - protected String getHiveQL(final ProcessSession session, final FlowFile flowFile, final Charset charset) { - // Read the HiveQL from the FlowFile's content - final byte[] buffer = new byte[(int) flowFile.getSize()]; - session.read(flowFile, new InputStreamCallback() { - @Override - public void process(final InputStream in) throws IOException { - StreamUtils.fillBuffer(in, buffer); - } - }); - - // Create the PreparedStatement to use for this FlowFile. - return new String(buffer, charset); - } - - private class ParameterHolder { - String attributeName; - int jdbcType; - String value; - } - - /** - * Sets all of the appropriate parameters on the given PreparedStatement, based on the given FlowFile attributes. - * - * @param stmt the statement to set the parameters on - * @param attributes the attributes from which to derive parameter indices, values, and types - * @throws SQLException if the PreparedStatement throws a SQLException when the appropriate setter is called - */ - protected int setParameters(int base, final PreparedStatement stmt, int paramCount, final Map attributes) throws SQLException { - - Map parmMap = new TreeMap(); - - for (final Map.Entry entry : attributes.entrySet()) { - final String key = entry.getKey(); - final Matcher matcher = HIVEQL_TYPE_ATTRIBUTE_PATTERN.matcher(key); - if (matcher.matches()) { - final int parameterIndex = Integer.parseInt(matcher.group(1)); - if (parameterIndex >= base && parameterIndex < base + paramCount) { - final boolean isNumeric = NUMBER_PATTERN.matcher(entry.getValue()).matches(); - if (!isNumeric) { - throw new SQLDataException("Value of the " + key + " attribute is '" + entry.getValue() + "', which is not a valid JDBC numeral jdbcType"); - } - - final String valueAttrName = "hiveql.args." + parameterIndex + ".value"; - - ParameterHolder ph = new ParameterHolder(); - int realIndexLoc = parameterIndex - base +1; - - ph.jdbcType = Integer.parseInt(entry.getValue()); - ph.value = attributes.get(valueAttrName); - ph.attributeName = valueAttrName; - - parmMap.put(realIndexLoc, ph); - - } - } - } - - - // Now that's we've retrieved the correct number of parameters and it's sorted, let's set them. - for (final Map.Entry entry : parmMap.entrySet()) { - final Integer index = entry.getKey(); - final ParameterHolder ph = entry.getValue(); - - try { - setParameter(stmt, ph.attributeName, index, ph.value, ph.jdbcType); - } catch (final NumberFormatException nfe) { - throw new SQLDataException("The value of the " + ph.attributeName + " is '" + ph.value + "', which cannot be converted into the necessary data jdbcType", nfe); - } - } - return base + paramCount; - } - - /** - * Determines how to map the given value to the appropriate JDBC data jdbcType and sets the parameter on the - * provided PreparedStatement - * - * @param stmt the PreparedStatement to set the parameter on - * @param attrName the name of the attribute that the parameter is coming from - for logging purposes - * @param parameterIndex the index of the HiveQL parameter to set - * @param parameterValue the value of the HiveQL parameter to set - * @param jdbcType the JDBC Type of the HiveQL parameter to set - * @throws SQLException if the PreparedStatement throws a SQLException when calling the appropriate setter - */ - protected void setParameter(final PreparedStatement stmt, final String attrName, final int parameterIndex, final String parameterValue, final int jdbcType) throws SQLException { - if (parameterValue == null) { - stmt.setNull(parameterIndex, jdbcType); - } else { - try { - switch (jdbcType) { - case Types.BIT: - case Types.BOOLEAN: - stmt.setBoolean(parameterIndex, Boolean.parseBoolean(parameterValue)); - break; - case Types.TINYINT: - stmt.setByte(parameterIndex, Byte.parseByte(parameterValue)); - break; - case Types.SMALLINT: - stmt.setShort(parameterIndex, Short.parseShort(parameterValue)); - break; - case Types.INTEGER: - stmt.setInt(parameterIndex, Integer.parseInt(parameterValue)); - break; - case Types.BIGINT: - stmt.setLong(parameterIndex, Long.parseLong(parameterValue)); - break; - case Types.REAL: - stmt.setFloat(parameterIndex, Float.parseFloat(parameterValue)); - break; - case Types.FLOAT: - case Types.DOUBLE: - stmt.setDouble(parameterIndex, Double.parseDouble(parameterValue)); - break; - case Types.DECIMAL: - case Types.NUMERIC: - stmt.setBigDecimal(parameterIndex, new BigDecimal(parameterValue)); - break; - case Types.DATE: - stmt.setDate(parameterIndex, new Date(Long.parseLong(parameterValue))); - break; - case Types.TIME: - stmt.setTime(parameterIndex, new Time(Long.parseLong(parameterValue))); - break; - case Types.TIMESTAMP: - stmt.setTimestamp(parameterIndex, new Timestamp(Long.parseLong(parameterValue))); - break; - case Types.CHAR: - case Types.VARCHAR: - case Types.LONGNVARCHAR: - case Types.LONGVARCHAR: - stmt.setString(parameterIndex, parameterValue); - break; - default: - stmt.setObject(parameterIndex, parameterValue, jdbcType); - break; - } - } catch (SQLException e) { - // Log which attribute/parameter had an error, then rethrow to be handled at the top level - getLogger().error("Error setting parameter {} to value from {} ({})", new Object[]{parameterIndex, attrName, parameterValue}, e); - throw e; - } - } - } - - protected static class TableName { - private final String database; - private final String table; - private final boolean input; - - TableName(String database, String table, boolean input) { - this.database = database; - this.table = table; - this.input = input; - } - - public String getDatabase() { - return database; - } - - public String getTable() { - return table; - } - - public boolean isInput() { - return input; - } - - @Override - public String toString() { - return database == null || database.isEmpty() ? table : database + '.' + table; - } - - @Override - public boolean equals(Object o) { - if (this == o) return true; - if (o == null || getClass() != o.getClass()) return false; - - TableName tableName = (TableName) o; - - if (input != tableName.input) return false; - if (database != null ? !database.equals(tableName.database) : tableName.database != null) return false; - return table.equals(tableName.table); - } - - @Override - public int hashCode() { - int result = database != null ? database.hashCode() : 0; - result = 31 * result + table.hashCode(); - result = 31 * result + (input ? 1 : 0); - return result; - } - } - - protected Set findTableNames(final String query) { - final ASTNode node; - try { - node = new ParseDriver().parse(normalize(query)); - } catch (ParseException e) { - // If failed to parse the query, just log a message, but continue. - getLogger().debug("Failed to parse query: {} due to {}", new Object[]{query, e}, e); - return Collections.emptySet(); - } - - final HashSet tableNames = new HashSet<>(); - findTableNames(node, tableNames); - return tableNames; - } - - /** - * Normalize query. - * Hive resolves prepared statement parameters before executing a query, - * see {@link org.apache.hive.jdbc.HivePreparedStatement#updateSql(String, HashMap)} for detail. - * HiveParser does not expect '?' to be in a query string, and throws an Exception if there is one. - * In this normalize method, '?' is replaced to 'x' to avoid that. - */ - private String normalize(String query) { - return query.replace('?', 'x'); - } - - private void findTableNames(final Object obj, final Set tableNames) { - if (!(obj instanceof CommonTree)) { - return; - } - final CommonTree tree = (CommonTree) obj; - final int childCount = tree.getChildCount(); - if ("TOK_TABNAME".equals(tree.getText())) { - final TableName tableName; - final boolean isInput = "TOK_TABREF".equals(tree.getParent().getText()); - switch (childCount) { - case 1 : - tableName = new TableName(null, tree.getChild(0).getText(), isInput); - break; - case 2: - tableName = new TableName(tree.getChild(0).getText(), tree.getChild(1).getText(), isInput); - break; - default: - throw new IllegalStateException("TOK_TABNAME does not have expected children, childCount=" + childCount); - } - // If parent is TOK_TABREF, then it is an input table. - tableNames.add(tableName); - return; - } - for (int i = 0; i < childCount; i++) { - findTableNames(tree.getChild(i), tableNames); - } - } - - protected Map toQueryTableAttributes(Set tableNames) { - final Map attributes = new HashMap<>(); - for (TableName tableName : tableNames) { - final String attributeName = tableName.isInput() ? ATTR_INPUT_TABLES : ATTR_OUTPUT_TABLES; - if (attributes.containsKey(attributeName)) { - attributes.put(attributeName, attributes.get(attributeName) + "," + tableName); - } else { - attributes.put(attributeName, tableName.toString()); - } - } - return attributes; - } -} diff --git a/nifi-nar-bundles/nifi-hive-bundle/nifi-hive-processors/src/main/java/org/apache/nifi/processors/hive/ConvertAvroToORC.java b/nifi-nar-bundles/nifi-hive-bundle/nifi-hive-processors/src/main/java/org/apache/nifi/processors/hive/ConvertAvroToORC.java deleted file mode 100644 index d918365a37..0000000000 --- a/nifi-nar-bundles/nifi-hive-bundle/nifi-hive-processors/src/main/java/org/apache/nifi/processors/hive/ConvertAvroToORC.java +++ /dev/null @@ -1,293 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.nifi.processors.hive; - -import org.apache.avro.Schema; -import org.apache.avro.file.DataFileStream; -import org.apache.avro.generic.GenericDatumReader; -import org.apache.avro.generic.GenericRecord; -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.Path; -import org.apache.hadoop.hive.ql.io.orc.CompressionKind; -import org.apache.hadoop.hive.ql.io.orc.NiFiOrcUtils; -import org.apache.hadoop.hive.ql.io.orc.OrcFlowFileWriter; -import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; -import org.apache.nifi.annotation.behavior.InputRequirement; -import org.apache.nifi.annotation.behavior.SideEffectFree; -import org.apache.nifi.annotation.behavior.SupportsBatching; -import org.apache.nifi.annotation.behavior.WritesAttribute; -import org.apache.nifi.annotation.behavior.WritesAttributes; -import org.apache.nifi.annotation.documentation.CapabilityDescription; -import org.apache.nifi.annotation.documentation.Tags; -import org.apache.nifi.annotation.lifecycle.OnScheduled; -import org.apache.nifi.components.PropertyDescriptor; -import org.apache.nifi.components.resource.ResourceCardinality; -import org.apache.nifi.components.resource.ResourceType; -import org.apache.nifi.expression.ExpressionLanguageScope; -import org.apache.nifi.flowfile.FlowFile; -import org.apache.nifi.flowfile.attributes.CoreAttributes; -import org.apache.nifi.processor.AbstractProcessor; -import org.apache.nifi.processor.DataUnit; -import org.apache.nifi.processor.ProcessContext; -import org.apache.nifi.processor.ProcessSession; -import org.apache.nifi.processor.Relationship; -import org.apache.nifi.processor.exception.ProcessException; -import org.apache.nifi.processor.util.StandardValidators; -import org.apache.nifi.util.hive.HiveJdbcCommon; - -import java.io.BufferedInputStream; -import java.io.BufferedOutputStream; -import java.io.IOException; -import java.io.InputStream; -import java.io.OutputStream; -import java.util.ArrayList; -import java.util.Collections; -import java.util.HashSet; -import java.util.List; -import java.util.Set; -import java.util.concurrent.atomic.AtomicInteger; -import java.util.concurrent.atomic.AtomicReference; - -/** - * The ConvertAvroToORC processor takes an Avro-formatted flow file as input and converts it into ORC format. - */ -@SideEffectFree -@SupportsBatching -@Tags({"avro", "orc", "hive", "convert"}) -@InputRequirement(InputRequirement.Requirement.INPUT_REQUIRED) -@CapabilityDescription("Converts an Avro record into ORC file format. This processor provides a direct mapping of an Avro record to an ORC record, such " - + "that the resulting ORC file will have the same hierarchical structure as the Avro document. If an incoming FlowFile contains a stream of " - + "multiple Avro records, the resultant FlowFile will contain a ORC file containing all of the Avro records. If an incoming FlowFile does " - + "not contain any records, an empty ORC file is the output. NOTE: Many Avro datatypes (collections, primitives, and unions of primitives, e.g.) can " - + "be converted to ORC, but unions of collections and other complex datatypes may not be able to be converted to ORC.") -@WritesAttributes({ - @WritesAttribute(attribute = "mime.type", description = "Sets the mime type to application/octet-stream"), - @WritesAttribute(attribute = "filename", description = "Sets the filename to the existing filename with the extension replaced by / added to by .orc"), - @WritesAttribute(attribute = "record.count", description = "Sets the number of records in the ORC file."), - @WritesAttribute(attribute = "hive.ddl", description = "Creates a partial Hive DDL statement for creating a table in Hive from this ORC file. " - + "This can be used in ReplaceText for setting the content to the DDL. To make it valid DDL, add \"LOCATION ''\", where " - + "the path is the directory that contains this ORC file on HDFS. For example, ConvertAvroToORC can send flow files to a PutHDFS processor to send the file to " - + "HDFS, then to a ReplaceText to set the content to this DDL (plus the LOCATION clause as described), then to PutHiveQL processor to create the table " - + "if it doesn't exist.") -}) -public class ConvertAvroToORC extends AbstractProcessor { - - // Attributes - public static final String ORC_MIME_TYPE = "application/octet-stream"; - public static final String HIVE_DDL_ATTRIBUTE = "hive.ddl"; - public static final String RECORD_COUNT_ATTRIBUTE = "record.count"; - - - // Properties - public static final PropertyDescriptor ORC_CONFIGURATION_RESOURCES = new PropertyDescriptor.Builder() - .name("orc-config-resources") - .displayName("ORC Configuration Resources") - .description("A file or comma separated list of files which contains the ORC configuration (hive-site.xml, e.g.). Without this, Hadoop " - + "will search the classpath for a 'hive-site.xml' file or will revert to a default configuration. Please see the ORC documentation for more details.") - .required(false) - .identifiesExternalResource(ResourceCardinality.MULTIPLE, ResourceType.FILE) - .build(); - - public static final PropertyDescriptor STRIPE_SIZE = new PropertyDescriptor.Builder() - .name("orc-stripe-size") - .displayName("Stripe Size") - .description("The size of the memory buffer (in bytes) for writing stripes to an ORC file") - .required(true) - .addValidator(StandardValidators.DATA_SIZE_VALIDATOR) - .defaultValue("64 MB") - .build(); - - public static final PropertyDescriptor BUFFER_SIZE = new PropertyDescriptor.Builder() - .name("orc-buffer-size") - .displayName("Buffer Size") - .description("The maximum size of the memory buffers (in bytes) used for compressing and storing a stripe in memory. This is a hint to the ORC writer, " - + "which may choose to use a smaller buffer size based on stripe size and number of columns for efficient stripe writing and memory utilization.") - .required(true) - .addValidator(StandardValidators.DATA_SIZE_VALIDATOR) - .defaultValue("10 KB") - .build(); - - public static final PropertyDescriptor COMPRESSION_TYPE = new PropertyDescriptor.Builder() - .name("orc-compression-type") - .displayName("Compression Type") - .required(true) - .allowableValues("NONE", "ZLIB", "SNAPPY", "LZO") - .defaultValue("NONE") - .build(); - - public static final PropertyDescriptor HIVE_TABLE_NAME = new PropertyDescriptor.Builder() - .name("orc-hive-table-name") - .displayName("Hive Table Name") - .description("An optional table name to insert into the hive.ddl attribute. The generated DDL can be used by " - + "a PutHiveQL processor (presumably after a PutHDFS processor) to create a table backed by the converted ORC file. " - + "If this property is not provided, the full name (including namespace) of the incoming Avro record will be normalized " - + "and used as the table name.") - .required(false) - .expressionLanguageSupported(ExpressionLanguageScope.FLOWFILE_ATTRIBUTES) - .addValidator(StandardValidators.NON_BLANK_VALIDATOR) - .build(); - - // Relationships - static final Relationship REL_SUCCESS = new Relationship.Builder() - .name("success") - .description("A FlowFile is routed to this relationship after it has been converted to ORC format.") - .build(); - static final Relationship REL_FAILURE = new Relationship.Builder() - .name("failure") - .description("A FlowFile is routed to this relationship if it cannot be parsed as Avro or cannot be converted to ORC for any reason") - .build(); - - private final static List propertyDescriptors; - private final static Set relationships; - - private volatile Configuration orcConfig; - - /* - * Will ensure that the list of property descriptors is built only once. - * Will also create a Set of relationships - */ - static { - List _propertyDescriptors = new ArrayList<>(); - _propertyDescriptors.add(ORC_CONFIGURATION_RESOURCES); - _propertyDescriptors.add(STRIPE_SIZE); - _propertyDescriptors.add(BUFFER_SIZE); - _propertyDescriptors.add(COMPRESSION_TYPE); - _propertyDescriptors.add(HIVE_TABLE_NAME); - propertyDescriptors = Collections.unmodifiableList(_propertyDescriptors); - - Set _relationships = new HashSet<>(); - _relationships.add(REL_SUCCESS); - _relationships.add(REL_FAILURE); - relationships = Collections.unmodifiableSet(_relationships); - } - - @Override - protected List getSupportedPropertyDescriptors() { - return propertyDescriptors; - } - - @Override - public Set getRelationships() { - return relationships; - } - - @OnScheduled - public void setup(ProcessContext context) { - boolean confFileProvided = context.getProperty(ORC_CONFIGURATION_RESOURCES).isSet(); - if (confFileProvided) { - final String configFiles = context.getProperty(ORC_CONFIGURATION_RESOURCES).getValue(); - orcConfig = HiveJdbcCommon.getConfigurationFromFiles(configFiles); - } - } - - @Override - public void onTrigger(final ProcessContext context, final ProcessSession session) throws ProcessException { - FlowFile flowFile = session.get(); - if (flowFile == null) { - return; - } - - try { - long startTime = System.currentTimeMillis(); - final long stripeSize = context.getProperty(STRIPE_SIZE).asDataSize(DataUnit.B).longValue(); - final int bufferSize = context.getProperty(BUFFER_SIZE).asDataSize(DataUnit.B).intValue(); - final CompressionKind compressionType = CompressionKind.valueOf(context.getProperty(COMPRESSION_TYPE).getValue()); - final AtomicReference hiveAvroSchema = new AtomicReference<>(null); - final AtomicInteger totalRecordCount = new AtomicInteger(0); - final String fileName = flowFile.getAttribute(CoreAttributes.FILENAME.key()); - flowFile = session.write(flowFile, (rawIn, rawOut) -> { - try (final InputStream in = new BufferedInputStream(rawIn); - final OutputStream out = new BufferedOutputStream(rawOut); - final DataFileStream reader = new DataFileStream<>(in, new GenericDatumReader<>())) { - - // Create ORC schema from Avro schema - Schema avroSchema = reader.getSchema(); - - TypeInfo orcSchema = NiFiOrcUtils.getOrcField(avroSchema); - - if (orcConfig == null) { - orcConfig = new Configuration(); - } - - OrcFlowFileWriter orcWriter = NiFiOrcUtils.createWriter( - out, - new Path(fileName), - orcConfig, - orcSchema, - stripeSize, - compressionType, - bufferSize); - try { - - int recordCount = 0; - while (reader.hasNext()) { - GenericRecord currRecord = reader.next(); - List fields = currRecord.getSchema().getFields(); - if (fields != null) { - Object[] row = new Object[fields.size()]; - for (int i = 0; i < fields.size(); i++) { - Schema.Field field = fields.get(i); - Schema fieldSchema = field.schema(); - Object o = currRecord.get(field.name()); - try { - row[i] = NiFiOrcUtils.convertToORCObject(NiFiOrcUtils.getOrcField(fieldSchema), o); - } catch (ArrayIndexOutOfBoundsException aioobe) { - getLogger().error("Index out of bounds at record {} for column {}, type {}, and object {}", - new Object[]{recordCount, i, fieldSchema.getType().getName(), o.toString()}, - aioobe); - throw new IOException(aioobe); - } - } - orcWriter.addRow(NiFiOrcUtils.createOrcStruct(orcSchema, row)); - recordCount++; - } - } - hiveAvroSchema.set(avroSchema); - totalRecordCount.set(recordCount); - } finally { - // finished writing this record, close the writer (which will flush to the flow file) - orcWriter.close(); - } - } - }); - - final String hiveTableName = context.getProperty(HIVE_TABLE_NAME).isSet() - ? context.getProperty(HIVE_TABLE_NAME).evaluateAttributeExpressions(flowFile).getValue() - : NiFiOrcUtils.normalizeHiveTableName(hiveAvroSchema.get().getFullName()); - String hiveDDL = NiFiOrcUtils.generateHiveDDL(hiveAvroSchema.get(), hiveTableName); - // Add attributes and transfer to success - flowFile = session.putAttribute(flowFile, RECORD_COUNT_ATTRIBUTE, Integer.toString(totalRecordCount.get())); - flowFile = session.putAttribute(flowFile, HIVE_DDL_ATTRIBUTE, hiveDDL); - StringBuilder newFilename = new StringBuilder(); - int extensionIndex = fileName.lastIndexOf("."); - if (extensionIndex != -1) { - newFilename.append(fileName.substring(0, extensionIndex)); - } else { - newFilename.append(fileName); - } - newFilename.append(".orc"); - flowFile = session.putAttribute(flowFile, CoreAttributes.MIME_TYPE.key(), ORC_MIME_TYPE); - flowFile = session.putAttribute(flowFile, CoreAttributes.FILENAME.key(), newFilename.toString()); - session.transfer(flowFile, REL_SUCCESS); - session.getProvenanceReporter().modifyContent(flowFile, "Converted " + totalRecordCount.get() + " records", System.currentTimeMillis() - startTime); - - } catch (ProcessException | IllegalArgumentException e) { - getLogger().error("Failed to convert {} from Avro to ORC due to {}; transferring to failure", new Object[]{flowFile, e}); - session.transfer(flowFile, REL_FAILURE); - } - } -} diff --git a/nifi-nar-bundles/nifi-hive-bundle/nifi-hive-processors/src/main/java/org/apache/nifi/processors/hive/PutHiveQL.java b/nifi-nar-bundles/nifi-hive-bundle/nifi-hive-processors/src/main/java/org/apache/nifi/processors/hive/PutHiveQL.java deleted file mode 100644 index ffa0a3edc2..0000000000 --- a/nifi-nar-bundles/nifi-hive-bundle/nifi-hive-processors/src/main/java/org/apache/nifi/processors/hive/PutHiveQL.java +++ /dev/null @@ -1,300 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.nifi.processors.hive; - -import org.apache.commons.lang3.StringUtils; -import org.apache.nifi.annotation.behavior.InputRequirement; -import org.apache.nifi.annotation.behavior.InputRequirement.Requirement; -import org.apache.nifi.annotation.behavior.ReadsAttribute; -import org.apache.nifi.annotation.behavior.ReadsAttributes; -import org.apache.nifi.annotation.behavior.WritesAttribute; -import org.apache.nifi.annotation.behavior.WritesAttributes; -import org.apache.nifi.annotation.documentation.CapabilityDescription; -import org.apache.nifi.annotation.documentation.DeprecationNotice; -import org.apache.nifi.annotation.documentation.SeeAlso; -import org.apache.nifi.annotation.documentation.Tags; -import org.apache.nifi.annotation.lifecycle.OnScheduled; -import org.apache.nifi.components.PropertyDescriptor; -import org.apache.nifi.dbcp.hive.HiveDBCPService; -import org.apache.nifi.expression.ExpressionLanguageScope; -import org.apache.nifi.flowfile.FlowFile; -import org.apache.nifi.processor.ProcessContext; -import org.apache.nifi.processor.ProcessSession; -import org.apache.nifi.processor.ProcessSessionFactory; -import org.apache.nifi.processor.Relationship; -import org.apache.nifi.processor.exception.ProcessException; -import org.apache.nifi.processor.util.StandardValidators; -import org.apache.nifi.processor.util.pattern.ErrorTypes; -import org.apache.nifi.processor.util.pattern.ExceptionHandler; -import org.apache.nifi.processor.util.pattern.ExceptionHandler.OnError; -import org.apache.nifi.processor.util.pattern.PartialFunctions.FetchFlowFiles; -import org.apache.nifi.processor.util.pattern.PartialFunctions.InitConnection; -import org.apache.nifi.processor.util.pattern.Put; -import org.apache.nifi.processor.util.pattern.RollbackOnFailure; -import org.apache.nifi.processor.util.pattern.RoutingResult; - -import java.nio.charset.Charset; -import java.sql.Connection; -import java.sql.PreparedStatement; -import java.sql.SQLException; -import java.sql.SQLNonTransientException; -import java.util.ArrayList; -import java.util.Collections; -import java.util.HashSet; -import java.util.List; -import java.util.Set; -import java.util.concurrent.TimeUnit; -import java.util.regex.Pattern; - -@SeeAlso(SelectHiveQL.class) -@InputRequirement(Requirement.INPUT_REQUIRED) -@Tags({"sql", "hive", "put", "database", "update", "insert"}) -@CapabilityDescription("Executes a HiveQL DDL/DML command (UPDATE, INSERT, e.g.). The content of an incoming FlowFile is expected to be the HiveQL command " - + "to execute. The HiveQL command may use the ? to escape parameters. In this case, the parameters to use must exist as FlowFile attributes " - + "with the naming convention hiveql.args.N.type and hiveql.args.N.value, where N is a positive integer. The hiveql.args.N.type is expected to be " - + "a number indicating the JDBC Type. The content of the FlowFile is expected to be in UTF-8 format.") -@ReadsAttributes({ - @ReadsAttribute(attribute = "hiveql.args.N.type", description = "Incoming FlowFiles are expected to be parametrized HiveQL statements. The type of each Parameter is specified as an integer " - + "that represents the JDBC Type of the parameter."), - @ReadsAttribute(attribute = "hiveql.args.N.value", description = "Incoming FlowFiles are expected to be parametrized HiveQL statements. The value of the Parameters are specified as " - + "hiveql.args.1.value, hiveql.args.2.value, hiveql.args.3.value, and so on. The type of the hiveql.args.1.value Parameter is specified by the hiveql.args.1.type attribute.") -}) -@WritesAttributes({ - @WritesAttribute(attribute = "query.input.tables", description = "This attribute is written on the flow files routed to the 'success' relationships, " - + "and contains input table names (if any) in comma delimited 'databaseName.tableName' format."), - @WritesAttribute(attribute = "query.output.tables", description = "This attribute is written on the flow files routed to the 'success' relationships, " - + "and contains the target table names in 'databaseName.tableName' format.") -}) -@DeprecationNotice(classNames = "org.apache.nifi.processors.hive.PutHive3QL") -public class PutHiveQL extends AbstractHiveQLProcessor { - - public static final PropertyDescriptor BATCH_SIZE = new PropertyDescriptor.Builder() - .name("hive-batch-size") - .displayName("Batch Size") - .description("The preferred number of FlowFiles to put to the database in a single transaction") - .required(true) - .addValidator(StandardValidators.POSITIVE_INTEGER_VALIDATOR) - .defaultValue("100") - .build(); - - public static final PropertyDescriptor STATEMENT_DELIMITER = new PropertyDescriptor.Builder() - .name("statement-delimiter") - .displayName("Statement Delimiter") - .description("Statement Delimiter used to separate SQL statements in a multiple statement script") - .required(true) - .defaultValue(";") - .addValidator(StandardValidators.NON_EMPTY_VALIDATOR) - .expressionLanguageSupported(ExpressionLanguageScope.NONE) - .build(); - - public static final Relationship REL_SUCCESS = new Relationship.Builder() - .name("success") - .description("A FlowFile is routed to this relationship after the database is successfully updated") - .build(); - public static final Relationship REL_RETRY = new Relationship.Builder() - .name("retry") - .description("A FlowFile is routed to this relationship if the database cannot be updated but attempting the operation again may succeed") - .build(); - public static final Relationship REL_FAILURE = new Relationship.Builder() - .name("failure") - .description("A FlowFile is routed to this relationship if the database cannot be updated and retrying the operation will also fail, " - + "such as an invalid query or an integrity constraint violation") - .build(); - - - private final static List propertyDescriptors; - private final static Set relationships; - - /* - * Will ensure that the list of property descriptors is built only once. - * Will also create a Set of relationships - */ - static { - List _propertyDescriptors = new ArrayList<>(); - _propertyDescriptors.add(HIVE_DBCP_SERVICE); - _propertyDescriptors.add(BATCH_SIZE); - _propertyDescriptors.add(CHARSET); - _propertyDescriptors.add(STATEMENT_DELIMITER); - _propertyDescriptors.add(RollbackOnFailure.ROLLBACK_ON_FAILURE); - propertyDescriptors = Collections.unmodifiableList(_propertyDescriptors); - - Set _relationships = new HashSet<>(); - _relationships.add(REL_SUCCESS); - _relationships.add(REL_FAILURE); - _relationships.add(REL_RETRY); - relationships = Collections.unmodifiableSet(_relationships); - } - - private Put process; - private ExceptionHandler exceptionHandler; - - @OnScheduled - public void constructProcess() { - exceptionHandler = new ExceptionHandler<>(); - exceptionHandler.mapException(e -> { - if (e instanceof SQLNonTransientException) { - return ErrorTypes.InvalidInput; - } else if (e instanceof SQLException) { - // Use the SQLException's vendor code for guidance -- see Hive's ErrorMsg class for details on error codes - int errorCode = ((SQLException) e).getErrorCode(); - getLogger().debug("Error occurred during Hive operation, Hive returned error code {}", new Object[]{errorCode}); - if (errorCode >= 10000 && errorCode < 20000) { - return ErrorTypes.InvalidInput; - } else if (errorCode >= 20000 && errorCode < 30000) { - return ErrorTypes.InvalidInput; - } else if (errorCode >= 30000 && errorCode < 40000) { - return ErrorTypes.TemporalInputFailure; - } else if (errorCode >= 40000 && errorCode < 50000) { - // These are unknown errors (to include some parse errors), but rather than generating an UnknownFailure which causes - // a ProcessException, we'll route to failure via an InvalidInput error type. - return ErrorTypes.InvalidInput; - } else { - // Default unknown errors to TemporalFailure (as they were implemented originally), so they can be routed to failure - // or rolled back depending on the user's setting of Rollback On Failure. - return ErrorTypes.TemporalFailure; - } - } else { - return ErrorTypes.UnknownFailure; - } - }); - exceptionHandler.adjustError(RollbackOnFailure.createAdjustError(getLogger())); - - process = new Put<>(); - process.setLogger(getLogger()); - process.initConnection(initConnection); - process.fetchFlowFiles(fetchFlowFiles); - process.putFlowFile(putFlowFile); - process.adjustRoute(RollbackOnFailure.createAdjustRoute(REL_FAILURE, REL_RETRY)); - } - - @Override - protected List getSupportedPropertyDescriptors() { - return propertyDescriptors; - } - - @Override - public Set getRelationships() { - return relationships; - } - - private class FunctionContext extends RollbackOnFailure { - final Charset charset; - final String statementDelimiter; - final long startNanos = System.nanoTime(); - - String connectionUrl; - - - private FunctionContext(boolean rollbackOnFailure, Charset charset, String statementDelimiter) { - super(rollbackOnFailure, false); - this.charset = charset; - this.statementDelimiter = statementDelimiter; - } - } - - private InitConnection initConnection = (context, session, fc, ffs) -> { - final HiveDBCPService dbcpService = context.getProperty(HIVE_DBCP_SERVICE).asControllerService(HiveDBCPService.class); - final Connection connection = dbcpService.getConnection(ffs == null || ffs.isEmpty() ? Collections.emptyMap() : ffs.get(0).getAttributes()); - fc.connectionUrl = dbcpService.getConnectionURL(); - return connection; - }; - - private FetchFlowFiles fetchFlowFiles = (context, session, functionContext, result) -> { - final int batchSize = context.getProperty(BATCH_SIZE).asInteger(); - return session.get(batchSize); - }; - - private Put.PutFlowFile putFlowFile = (context, session, fc, conn, flowFile, result) -> { - final String script = getHiveQL(session, flowFile, fc.charset); - String regex = "(? tableNames = new HashSet<>(); - exceptionHandler.execute(fc, flowFile, input -> { - int loc = 1; - for (String hiveQLStr: hiveQLs) { - getLogger().debug("HiveQL: {}", new Object[]{hiveQLStr}); - - final String hiveQL = hiveQLStr.trim(); - if (!StringUtils.isEmpty(hiveQL)) { - try (final PreparedStatement stmt = conn.prepareStatement(hiveQL)) { - - // Get ParameterMetadata - // Hive JDBC Doesn't support this yet: - // ParameterMetaData pmd = stmt.getParameterMetaData(); - // int paramCount = pmd.getParameterCount(); - int paramCount = StringUtils.countMatches(hiveQL, "?"); - - if (paramCount > 0) { - loc = setParameters(loc, stmt, paramCount, flowFile.getAttributes()); - } - - // Parse hiveQL and extract input/output tables - try { - tableNames.addAll(findTableNames(hiveQL)); - } catch (Exception e) { - // If failed to parse the query, just log a warning message, but continue. - getLogger().warn("Failed to parse hiveQL: {} due to {}", new Object[]{hiveQL, e}, e); - } - - // Execute the statement - stmt.execute(); - fc.proceed(); - } - } - } - - // Emit a Provenance SEND event - final long transmissionMillis = TimeUnit.NANOSECONDS.toMillis(System.nanoTime() - fc.startNanos); - - final FlowFile updatedFlowFile = session.putAllAttributes(flowFile, toQueryTableAttributes(tableNames)); - session.getProvenanceReporter().send(updatedFlowFile, fc.connectionUrl, transmissionMillis, true); - result.routeTo(flowFile, REL_SUCCESS); - - }, onFlowFileError(context, session, result)); - - }; - - private OnError onFlowFileError(final ProcessContext context, final ProcessSession session, final RoutingResult result) { - OnError onFlowFileError = ExceptionHandler.createOnError(context, session, result, REL_FAILURE, REL_RETRY); - onFlowFileError = onFlowFileError.andThen((c, i, r, e) -> { - switch (r.destination()) { - case Failure: - getLogger().error("Failed to update Hive for {} due to {}; routing to failure", new Object[] {i, e}, e); - break; - case Retry: - getLogger().error("Failed to update Hive for {} due to {}; it is possible that retrying the operation will succeed, so routing to retry", - new Object[] {i, e}, e); - break; - case Self: - getLogger().error("Failed to update Hive for {} due to {};", new Object[] {i, e}, e); - break; - } - }); - return RollbackOnFailure.createOnError(onFlowFileError); - } - - @Override - public void onTrigger(ProcessContext context, ProcessSessionFactory sessionFactory) throws ProcessException { - final Boolean rollbackOnFailure = context.getProperty(RollbackOnFailure.ROLLBACK_ON_FAILURE).asBoolean(); - final Charset charset = Charset.forName(context.getProperty(CHARSET).getValue()); - final String statementDelimiter = context.getProperty(STATEMENT_DELIMITER).getValue(); - final FunctionContext functionContext = new FunctionContext(rollbackOnFailure, charset, statementDelimiter); - RollbackOnFailure.onTrigger(context, sessionFactory, functionContext, getLogger(), session -> process.onTrigger(context, session, functionContext)); - } -} \ No newline at end of file diff --git a/nifi-nar-bundles/nifi-hive-bundle/nifi-hive-processors/src/main/java/org/apache/nifi/processors/hive/PutHiveStreaming.java b/nifi-nar-bundles/nifi-hive-bundle/nifi-hive-processors/src/main/java/org/apache/nifi/processors/hive/PutHiveStreaming.java deleted file mode 100644 index 7511e44e2e..0000000000 --- a/nifi-nar-bundles/nifi-hive-bundle/nifi-hive-processors/src/main/java/org/apache/nifi/processors/hive/PutHiveStreaming.java +++ /dev/null @@ -1,1216 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.nifi.processors.hive; - -import com.google.common.util.concurrent.ThreadFactoryBuilder; -import org.apache.avro.AvroRuntimeException; -import org.apache.avro.file.CodecFactory; -import org.apache.avro.file.DataFileConstants; -import org.apache.avro.file.DataFileStream; -import org.apache.avro.file.DataFileWriter; -import org.apache.avro.file.SeekableByteArrayInput; -import org.apache.avro.generic.GenericDatumReader; -import org.apache.avro.generic.GenericDatumWriter; -import org.apache.avro.generic.GenericRecord; -import org.apache.hadoop.hive.conf.HiveConf; -import org.apache.hadoop.security.UserGroupInformation; -import org.apache.hive.hcatalog.streaming.ConnectionError; -import org.apache.hive.hcatalog.streaming.HiveEndPoint; -import org.apache.hive.hcatalog.streaming.SerializationError; -import org.apache.hive.hcatalog.streaming.StreamingException; -import org.apache.nifi.annotation.behavior.RequiresInstanceClassLoading; -import org.apache.nifi.annotation.behavior.WritesAttribute; -import org.apache.nifi.annotation.behavior.WritesAttributes; -import org.apache.nifi.annotation.documentation.CapabilityDescription; -import org.apache.nifi.annotation.documentation.DeprecationNotice; -import org.apache.nifi.annotation.documentation.Tags; -import org.apache.nifi.annotation.lifecycle.OnScheduled; -import org.apache.nifi.annotation.lifecycle.OnStopped; -import org.apache.nifi.components.PropertyDescriptor; -import org.apache.nifi.components.ValidationContext; -import org.apache.nifi.components.ValidationResult; -import org.apache.nifi.components.Validator; -import org.apache.nifi.components.resource.ResourceCardinality; -import org.apache.nifi.components.resource.ResourceType; -import org.apache.nifi.expression.ExpressionLanguageScope; -import org.apache.nifi.flowfile.FlowFile; -import org.apache.nifi.hadoop.KerberosProperties; -import org.apache.nifi.hadoop.SecurityUtil; -import org.apache.nifi.kerberos.KerberosCredentialsService; -import org.apache.nifi.logging.ComponentLog; -import org.apache.nifi.processor.AbstractSessionFactoryProcessor; -import org.apache.nifi.processor.ProcessContext; -import org.apache.nifi.processor.ProcessSession; -import org.apache.nifi.processor.ProcessSessionFactory; -import org.apache.nifi.processor.ProcessorInitializationContext; -import org.apache.nifi.processor.Relationship; -import org.apache.nifi.processor.exception.ProcessException; -import org.apache.nifi.processor.io.InputStreamCallback; -import org.apache.nifi.processor.util.StandardValidators; -import org.apache.nifi.processor.util.pattern.DiscontinuedException; -import org.apache.nifi.processor.util.pattern.ErrorTypes; -import org.apache.nifi.processor.util.pattern.ExceptionHandler; -import org.apache.nifi.processor.util.pattern.RollbackOnFailure; -import org.apache.nifi.processor.util.pattern.RoutingResult; -import org.apache.nifi.security.krb.KerberosKeytabUser; -import org.apache.nifi.security.krb.KerberosLoginException; -import org.apache.nifi.security.krb.KerberosPasswordUser; -import org.apache.nifi.security.krb.KerberosUser; -import org.apache.nifi.util.hive.AuthenticationFailedException; -import org.apache.nifi.util.hive.HiveConfigurator; -import org.apache.nifi.util.hive.HiveOptions; -import org.apache.nifi.util.hive.HiveUtils; -import org.apache.nifi.util.hive.HiveWriter; -import org.apache.nifi.util.hive.ValidationResources; -import org.xerial.snappy.Snappy; - -import java.io.ByteArrayOutputStream; -import java.io.File; -import java.io.IOException; -import java.io.InputStream; -import java.nio.charset.StandardCharsets; -import java.util.ArrayList; -import java.util.Collection; -import java.util.Collections; -import java.util.HashMap; -import java.util.HashSet; -import java.util.List; -import java.util.Map; -import java.util.Set; -import java.util.Timer; -import java.util.TimerTask; -import java.util.concurrent.ConcurrentHashMap; -import java.util.concurrent.ConcurrentLinkedQueue; -import java.util.concurrent.ExecutorService; -import java.util.concurrent.Executors; -import java.util.concurrent.Semaphore; -import java.util.concurrent.TimeUnit; -import java.util.concurrent.atomic.AtomicBoolean; -import java.util.concurrent.atomic.AtomicInteger; -import java.util.concurrent.atomic.AtomicReference; -import java.util.function.BiFunction; -import java.util.regex.Pattern; - -@Tags({"hive", "streaming", "put", "database", "store"}) -@CapabilityDescription("This processor uses Hive Streaming to send flow file data to an Apache Hive table. The incoming flow file is expected to be in " - + "Avro format and the table must exist in Hive. Please see the Hive documentation for requirements on the Hive table (format, partitions, etc.). " - + "The partition values are extracted from the Avro record based on the names of the partition columns as specified in the processor. NOTE: If " - + "multiple concurrent tasks are configured for this processor, only one table can be written to at any time by a single thread. Additional tasks " - + "intending to write to the same table will wait for the current task to finish writing to the table.") -@WritesAttributes({ - @WritesAttribute(attribute = "hivestreaming.record.count", description = "This attribute is written on the flow files routed to the 'success' " - + "and 'failure' relationships, and contains the number of records from the incoming flow file written successfully and unsuccessfully, respectively."), - @WritesAttribute(attribute = "query.output.tables", description = "This attribute is written on the flow files routed to the 'success' " - + "and 'failure' relationships, and contains the target table name in 'databaseName.tableName' format.") -}) -@RequiresInstanceClassLoading -@DeprecationNotice(classNames = "org.apache.nifi.processors.hive.PutHive3Streaming") -public class PutHiveStreaming extends AbstractSessionFactoryProcessor { - private static final String ALLOW_EXPLICIT_KEYTAB = "NIFI_ALLOW_EXPLICIT_KEYTAB"; - - // Attributes - public static final String HIVE_STREAMING_RECORD_COUNT_ATTR = "hivestreaming.record.count"; - - private static final String CLIENT_CACHE_DISABLED_PROPERTY = "hcatalog.hive.client.cache.disabled"; - - // Validators - private static final Validator GREATER_THAN_ONE_VALIDATOR = (subject, value, context) -> { - if (context.isExpressionLanguageSupported(subject) && context.isExpressionLanguagePresent(value)) { - return new ValidationResult.Builder().subject(subject).input(value).explanation("Expression Language Present").valid(true).build(); - } - - String reason = null; - try { - final int intVal = Integer.parseInt(value); - - if (intVal < 2) { - reason = "value is less than 2"; - } - } catch (final NumberFormatException e) { - reason = "value is not a valid integer"; - } - - return new ValidationResult.Builder().subject(subject).input(value).explanation(reason).valid(reason == null).build(); - }; - - // Metadata keys that are not transferred to split files when output strategy is datafile - // Avro will write this key/values pairs on its own - private static final Set RESERVED_METADATA; - - static { - // This is used to prevent a race condition in Snappy 1.0.5 where two classloaders could - // try to define the native loader class at the same time, causing an error. Make a no-op - // call here to ensure Snappy's static initializers are called. Note that this block is - // called once by the extensions loader before any actual processor instances are created, - // so the race condition will not occur, and for each other instance, this is a no-op - try { - Snappy.compress(""); - } catch (IOException ioe) { - // Do nothing here, should never happen as it is intended to be a no-op - } - - Set reservedMetadata = new HashSet<>(); - reservedMetadata.add("avro.schema"); - reservedMetadata.add("avro.codec"); - RESERVED_METADATA = Collections.unmodifiableSet(reservedMetadata); - } - - // Properties - public static final PropertyDescriptor METASTORE_URI = new PropertyDescriptor.Builder() - .name("hive-stream-metastore-uri") - .displayName("Hive Metastore URI") - .description("The URI location for the Hive Metastore. Note that this is not the location of the Hive Server. The default port for the " - + "Hive metastore is 9043.") - .required(true) - .expressionLanguageSupported(ExpressionLanguageScope.FLOWFILE_ATTRIBUTES) - .addValidator(StandardValidators.URI_VALIDATOR) - .addValidator(StandardValidators.createRegexMatchingValidator(Pattern.compile("(^[^/]+.*[^/]+$|^[^/]+$|^$)"))) // no start with / or end with / - .build(); - - public static final PropertyDescriptor HIVE_CONFIGURATION_RESOURCES = new PropertyDescriptor.Builder() - .name("hive-config-resources") - .displayName("Hive Configuration Resources") - .description("A file or comma separated list of files which contains the Hive configuration (hive-site.xml, e.g.). Without this, Hadoop " - + "will search the classpath for a 'hive-site.xml' file or will revert to a default configuration. Note that to enable authentication " - + "with Kerberos e.g., the appropriate properties must be set in the configuration files. Also note that if Max Concurrent Tasks is set " - + "to a number greater than one, the 'hcatalog.hive.client.cache.disabled' property will be forced to 'true' to avoid concurrency issues. " - + "Please see the Hive documentation for more details.") - .required(false) - .identifiesExternalResource(ResourceCardinality.MULTIPLE, ResourceType.FILE) - .expressionLanguageSupported(ExpressionLanguageScope.VARIABLE_REGISTRY) - .build(); - - public static final PropertyDescriptor DB_NAME = new PropertyDescriptor.Builder() - .name("hive-stream-database-name") - .displayName("Database Name") - .description("The name of the database in which to put the data.") - .required(true) - .expressionLanguageSupported(ExpressionLanguageScope.FLOWFILE_ATTRIBUTES) - .addValidator(StandardValidators.NON_EMPTY_VALIDATOR) - .build(); - - public static final PropertyDescriptor TABLE_NAME = new PropertyDescriptor.Builder() - .name("hive-stream-table-name") - .displayName("Table Name") - .description("The name of the database table in which to put the data.") - .required(true) - .expressionLanguageSupported(ExpressionLanguageScope.FLOWFILE_ATTRIBUTES) - .addValidator(StandardValidators.NON_EMPTY_VALIDATOR) - .build(); - - public static final PropertyDescriptor PARTITION_COLUMNS = new PropertyDescriptor.Builder() - .name("hive-stream-partition-cols") - .displayName("Partition Columns") - .description("A comma-delimited list of column names on which the table has been partitioned. The order of values in this list must " - + "correspond exactly to the order of partition columns specified during the table creation.") - .required(false) - .expressionLanguageSupported(ExpressionLanguageScope.VARIABLE_REGISTRY) - .addValidator(StandardValidators.createRegexMatchingValidator(Pattern.compile("[^,]+(,[^,]+)*"))) // comma-separated list with non-empty entries - .build(); - - public static final PropertyDescriptor AUTOCREATE_PARTITIONS = new PropertyDescriptor.Builder() - .name("hive-stream-autocreate-partition") - .displayName("Auto-Create Partitions") - .description("Flag indicating whether partitions should be automatically created") - .required(true) - .addValidator(StandardValidators.BOOLEAN_VALIDATOR) - .allowableValues("true", "false") - .defaultValue("true") - .build(); - - public static final PropertyDescriptor MAX_OPEN_CONNECTIONS = new PropertyDescriptor.Builder() - .name("hive-stream-max-open-connections") - .displayName("Max Open Connections") - .description("The maximum number of open connections that can be allocated from this pool at the same time, " - + "or negative for no limit.") - .defaultValue("8") - .required(true) - .addValidator(StandardValidators.INTEGER_VALIDATOR) - .sensitive(false) - .build(); - - public static final PropertyDescriptor HEARTBEAT_INTERVAL = new PropertyDescriptor.Builder() - .name("hive-stream-heartbeat-interval") - .displayName("Heartbeat Interval") - .description("Indicates that a heartbeat should be sent when the specified number of seconds has elapsed. " - + "A value of 0 indicates that no heartbeat should be sent. " - + "Note that although this property supports Expression Language, it will not be evaluated against incoming FlowFile attributes.") - .defaultValue("60") - .required(true) - .addValidator(StandardValidators.NON_NEGATIVE_INTEGER_VALIDATOR) - .expressionLanguageSupported(ExpressionLanguageScope.VARIABLE_REGISTRY) - .build(); - - public static final PropertyDescriptor TXNS_PER_BATCH = new PropertyDescriptor.Builder() - .name("hive-stream-transactions-per-batch") - .displayName("Transactions per Batch") - .description("A hint to Hive Streaming indicating how many transactions the processor task will need. This value must be greater than 1.") - .required(true) - .expressionLanguageSupported(ExpressionLanguageScope.FLOWFILE_ATTRIBUTES) - .addValidator(GREATER_THAN_ONE_VALIDATOR) - .defaultValue("100") - .build(); - - public static final PropertyDescriptor RECORDS_PER_TXN = new PropertyDescriptor.Builder() - .name("hive-stream-records-per-transaction") - .displayName("Records per Transaction") - .description("Number of records to process before committing the transaction. This value must be greater than 1.") - .required(true) - .expressionLanguageSupported(ExpressionLanguageScope.FLOWFILE_ATTRIBUTES) - .addValidator(GREATER_THAN_ONE_VALIDATOR) - .defaultValue("10000") - .build(); - - public static final PropertyDescriptor CALL_TIMEOUT = new PropertyDescriptor.Builder() - .name("hive-stream-call-timeout") - .displayName("Call Timeout") - .description("The number of seconds allowed for a Hive Streaming operation to complete. A value of 0 indicates the processor should wait indefinitely on operations. " - + "Note that although this property supports Expression Language, it will not be evaluated against incoming FlowFile attributes.") - .defaultValue("0") - .required(true) - .addValidator(StandardValidators.NON_NEGATIVE_INTEGER_VALIDATOR) - .expressionLanguageSupported(ExpressionLanguageScope.VARIABLE_REGISTRY) - .build(); - - public static final PropertyDescriptor ROLLBACK_ON_FAILURE = RollbackOnFailure.createRollbackOnFailureProperty( - "NOTE: When an error occurred after a Hive streaming transaction which is derived from the same input FlowFile is already committed," + - " (i.e. a FlowFile contains more records than 'Records per Transaction' and a failure occurred at the 2nd transaction or later)" + - " then the succeeded records will be transferred to 'success' relationship while the original input FlowFile stays in incoming queue." + - " Duplicated records can be created for the succeeded ones when the same FlowFile is processed again."); - - static final PropertyDescriptor KERBEROS_CREDENTIALS_SERVICE = new PropertyDescriptor.Builder() - .name("kerberos-credentials-service") - .displayName("Kerberos Credentials Service") - .description("Specifies the Kerberos Credentials Controller Service that should be used for authenticating with Kerberos") - .identifiesControllerService(KerberosCredentialsService.class) - .required(false) - .build(); - - // Relationships - public static final Relationship REL_SUCCESS = new Relationship.Builder() - .name("success") - .description("A FlowFile containing Avro records routed to this relationship after the record has been successfully transmitted to Hive.") - .build(); - - public static final Relationship REL_FAILURE = new Relationship.Builder() - .name("failure") - .description("A FlowFile containing Avro records routed to this relationship if the record could not be transmitted to Hive.") - .build(); - - public static final Relationship REL_RETRY = new Relationship.Builder() - .name("retry") - .description("The incoming FlowFile is routed to this relationship if its records cannot be transmitted to Hive. Note that " - + "some records may have been processed successfully, they will be routed (as Avro flow files) to the success relationship. " - + "The combination of the retry, success, and failure relationships indicate how many records succeeded and/or failed. This " - + "can be used to provide a retry capability since full rollback is not possible.") - .build(); - - private List propertyDescriptors; - private Set relationships; - - protected KerberosProperties kerberosProperties; - private volatile File kerberosConfigFile = null; - - protected volatile HiveConfigurator hiveConfigurator = new HiveConfigurator(); - protected volatile UserGroupInformation ugi; - final protected AtomicReference kerberosUserReference = new AtomicReference<>(); - protected volatile HiveConf hiveConfig; - - protected final AtomicBoolean sendHeartBeat = new AtomicBoolean(false); - - protected volatile int callTimeout; - protected ExecutorService callTimeoutPool; - protected transient Timer heartBeatTimer; - - protected volatile ConcurrentLinkedQueue> threadWriterList = new ConcurrentLinkedQueue<>(); - protected volatile ConcurrentHashMap tableSemaphoreMap = new ConcurrentHashMap<>(); - - // Holder of cached Configuration information so validation does not reload the same config over and over - private final AtomicReference validationResourceHolder = new AtomicReference<>(); - - @Override - protected void init(ProcessorInitializationContext context) { - List props = new ArrayList<>(); - props.add(METASTORE_URI); - props.add(HIVE_CONFIGURATION_RESOURCES); - props.add(DB_NAME); - props.add(TABLE_NAME); - props.add(PARTITION_COLUMNS); - props.add(AUTOCREATE_PARTITIONS); - props.add(MAX_OPEN_CONNECTIONS); - props.add(HEARTBEAT_INTERVAL); - props.add(TXNS_PER_BATCH); - props.add(RECORDS_PER_TXN); - props.add(CALL_TIMEOUT); - props.add(ROLLBACK_ON_FAILURE); - props.add(KERBEROS_CREDENTIALS_SERVICE); - - kerberosConfigFile = context.getKerberosConfigurationFile(); - kerberosProperties = new KerberosProperties(kerberosConfigFile); - props.add(kerberosProperties.getKerberosPrincipal()); - props.add(kerberosProperties.getKerberosKeytab()); - props.add(kerberosProperties.getKerberosPassword()); - propertyDescriptors = Collections.unmodifiableList(props); - - Set _relationships = new HashSet<>(); - _relationships.add(REL_SUCCESS); - _relationships.add(REL_FAILURE); - _relationships.add(REL_RETRY); - relationships = Collections.unmodifiableSet(_relationships); - } - - @Override - protected List getSupportedPropertyDescriptors() { - return propertyDescriptors; - } - - @Override - public Set getRelationships() { - return relationships; - } - - - @Override - protected Collection customValidate(final ValidationContext validationContext) { - boolean confFileProvided = validationContext.getProperty(HIVE_CONFIGURATION_RESOURCES).isSet(); - - final List problems = new ArrayList<>(); - - if (confFileProvided) { - final String explicitPrincipal = validationContext.getProperty(kerberosProperties.getKerberosPrincipal()).evaluateAttributeExpressions().getValue(); - final String explicitKeytab = validationContext.getProperty(kerberosProperties.getKerberosKeytab()).evaluateAttributeExpressions().getValue(); - final String explicitPassword = validationContext.getProperty(kerberosProperties.getKerberosPassword()).getValue(); - final KerberosCredentialsService credentialsService = validationContext.getProperty(KERBEROS_CREDENTIALS_SERVICE).asControllerService(KerberosCredentialsService.class); - - final String resolvedPrincipal; - final String resolvedKeytab; - if (credentialsService == null) { - resolvedPrincipal = explicitPrincipal; - resolvedKeytab = explicitKeytab; - } else { - resolvedPrincipal = credentialsService.getPrincipal(); - resolvedKeytab = credentialsService.getKeytab(); - } - - - final String configFiles = validationContext.getProperty(HIVE_CONFIGURATION_RESOURCES).evaluateAttributeExpressions().getValue(); - problems.addAll(hiveConfigurator.validate(configFiles, resolvedPrincipal, resolvedKeytab, explicitPassword, validationResourceHolder, getLogger())); - - if (credentialsService != null && (explicitPrincipal != null || explicitKeytab != null || explicitPassword != null)) { - problems.add(new ValidationResult.Builder() - .subject("Kerberos Credentials") - .valid(false) - .explanation("Cannot specify a Kerberos Credentials Service while also specifying a Kerberos Principal, Kerberos Keytab, or Kerberos Password") - .build()); - } - - if (!isAllowExplicitKeytab() && explicitKeytab != null) { - problems.add(new ValidationResult.Builder() - .subject("Kerberos Credentials") - .valid(false) - .explanation("The '" + ALLOW_EXPLICIT_KEYTAB + "' system environment variable is configured to forbid explicitly configuring Kerberos Keytab in processors. " - + "The Kerberos Credentials Service should be used instead of setting the Kerberos Keytab or Kerberos Principal property.") - .build()); - } - } - - return problems; - } - - @OnScheduled - public void setup(final ProcessContext context) { - ComponentLog log = getLogger(); - - final Integer heartbeatInterval = context.getProperty(HEARTBEAT_INTERVAL).evaluateAttributeExpressions().asInteger(); - final String configFiles = context.getProperty(HIVE_CONFIGURATION_RESOURCES).evaluateAttributeExpressions().getValue(); - hiveConfig = hiveConfigurator.getConfigurationFromFiles(configFiles); - - // If more than one concurrent task, force 'hcatalog.hive.client.cache.disabled' to true - if(context.getMaxConcurrentTasks() > 1) { - hiveConfig.setBoolean(CLIENT_CACHE_DISABLED_PROPERTY, true); - } - - // add any dynamic properties to the Hive configuration - for (final Map.Entry entry : context.getProperties().entrySet()) { - final PropertyDescriptor descriptor = entry.getKey(); - if (descriptor.isDynamic()) { - hiveConfig.set(descriptor.getName(), entry.getValue()); - } - } - - hiveConfigurator.preload(hiveConfig); - - if (SecurityUtil.isSecurityEnabled(hiveConfig)) { - final String explicitPrincipal = context.getProperty(kerberosProperties.getKerberosPrincipal()).evaluateAttributeExpressions().getValue(); - final String explicitKeytab = context.getProperty(kerberosProperties.getKerberosKeytab()).evaluateAttributeExpressions().getValue(); - final String explicitPassword = context.getProperty(kerberosProperties.getKerberosPassword()).getValue(); - final KerberosCredentialsService credentialsService = context.getProperty(KERBEROS_CREDENTIALS_SERVICE).asControllerService(KerberosCredentialsService.class); - - final String resolvedPrincipal; - final String resolvedKeytab; - if (credentialsService == null) { - resolvedPrincipal = explicitPrincipal; - resolvedKeytab = explicitKeytab; - } else { - resolvedPrincipal = credentialsService.getPrincipal(); - resolvedKeytab = credentialsService.getKeytab(); - } - - if (resolvedKeytab != null) { - kerberosUserReference.set(new KerberosKeytabUser(resolvedPrincipal, resolvedKeytab)); - log.info("Hive Security Enabled, logging in as principal {} with keytab {}", new Object[] {resolvedPrincipal, resolvedKeytab}); - } else if (explicitPassword != null) { - kerberosUserReference.set(new KerberosPasswordUser(resolvedPrincipal, explicitPassword)); - log.info("Hive Security Enabled, logging in as principal {} with password", new Object[] {resolvedPrincipal}); - } else { - throw new ProcessException("Unable to authenticate with Kerberos, no keytab or password was provided"); - } - - try { - ugi = hiveConfigurator.authenticate(hiveConfig, kerberosUserReference.get()); - } catch (AuthenticationFailedException ae) { - throw new ProcessException("Kerberos authentication failed for Hive Streaming", ae); - } - - log.info("Successfully logged in as principal " + resolvedPrincipal); - } else { - ugi = null; - kerberosUserReference.set(null); - } - - callTimeout = context.getProperty(CALL_TIMEOUT).evaluateAttributeExpressions().asInteger() * 1000; // milliseconds - String timeoutName = "put-hive-streaming-%d"; - this.callTimeoutPool = Executors.newFixedThreadPool(1, - new ThreadFactoryBuilder().setNameFormat(timeoutName).build()); - - sendHeartBeat.set(true); - heartBeatTimer = new Timer(); - setupHeartBeatTimer(heartbeatInterval); - } - - private static class FunctionContext extends RollbackOnFailure { - - private AtomicReference successFlowFile; - private AtomicReference failureFlowFile; - private final DataFileWriter successAvroWriter = new DataFileWriter<>(new GenericDatumWriter()); - private final DataFileWriter failureAvroWriter = new DataFileWriter<>(new GenericDatumWriter()); - private byte[] successAvroHeader; - private byte[] failureAvroHeader; - - private final AtomicInteger recordCount = new AtomicInteger(0); - private final AtomicInteger successfulRecordCount = new AtomicInteger(0); - private final AtomicInteger failedRecordCount = new AtomicInteger(0); - - private final ComponentLog logger; - - /** - * It's possible that multiple Hive streaming transactions are committed within a single onTrigger. - * PutHiveStreaming onTrigger is not 'transactional' in a sense of RollbackOnFailure. - * Once a Hive streaming transaction is committed, processor session will not be rolled back. - * @param rollbackOnFailure whether process session should be rolled back if failed - */ - private FunctionContext(boolean rollbackOnFailure, ComponentLog logger) { - super(rollbackOnFailure, false); - this.logger = logger; - } - - private void setFlowFiles(FlowFile successFlowFile, FlowFile failureFlowFile) { - this.successFlowFile = new AtomicReference<>(successFlowFile); - this.failureFlowFile = new AtomicReference<>(failureFlowFile); - } - - private byte[] initAvroWriter(ProcessSession session, String codec, DataFileStream reader, - DataFileWriter writer, AtomicReference flowFileRef) { - - writer.setCodec(CodecFactory.fromString(codec)); - // Transfer metadata (this is a subset of the incoming file) - for (String metaKey : reader.getMetaKeys()) { - if (!RESERVED_METADATA.contains(metaKey)) { - writer.setMeta(metaKey, reader.getMeta(metaKey)); - } - } - - final ByteArrayOutputStream avroHeader = new ByteArrayOutputStream(); - flowFileRef.set(session.append(flowFileRef.get(), (out) -> { - // Create writer so that records can be appended later. - writer.create(reader.getSchema(), avroHeader); - writer.close(); - - final byte[] header = avroHeader.toByteArray(); - out.write(header); - })); - - // Capture the Avro header byte array that is just written to the FlowFile. - // This is needed when Avro records are appended to the same FlowFile. - return avroHeader.toByteArray(); - } - - private void initAvroWriters(ProcessSession session, String codec, DataFileStream reader) { - successAvroHeader = initAvroWriter(session, codec, reader, successAvroWriter, successFlowFile); - failureAvroHeader = initAvroWriter(session, codec, reader, failureAvroWriter, failureFlowFile); - } - - private void appendAvroRecords(ProcessSession session, byte[] avroHeader, DataFileWriter writer, - AtomicReference flowFileRef, List hRecords) { - - flowFileRef.set(session.append(flowFileRef.get(), (out) -> { - if (hRecords != null) { - // Initialize the writer again as append mode, so that Avro header is written only once. - writer.appendTo(new SeekableByteArrayInput(avroHeader), out); - try { - for (HiveStreamingRecord hRecord : hRecords) { - writer.append(hRecord.getRecord()); - } - } catch (IOException ioe) { - // The records were put to Hive Streaming successfully, but there was an error while writing the - // Avro records to the flow file. Log as an error and move on. - logger.error("Error writing Avro records (which were sent successfully to Hive Streaming) to the flow file, " + ioe, ioe); - } - } - writer.close(); - })); - } - - private void appendRecordsToSuccess(ProcessSession session, List records) { - appendAvroRecords(session, successAvroHeader, successAvroWriter, successFlowFile, records); - successfulRecordCount.addAndGet(records.size()); - } - - private void appendRecordsToFailure(ProcessSession session, List records) { - appendAvroRecords(session, failureAvroHeader, failureAvroWriter, failureFlowFile, records); - failedRecordCount.addAndGet(records.size()); - } - - private void transferFlowFiles(ProcessSession session, RoutingResult result, HiveOptions options) { - - if (successfulRecordCount.get() > 0) { - // Transfer the flow file with successful records - Map updateAttributes = new HashMap<>(); - updateAttributes.put(HIVE_STREAMING_RECORD_COUNT_ATTR, Integer.toString(successfulRecordCount.get())); - updateAttributes.put(AbstractHiveQLProcessor.ATTR_OUTPUT_TABLES, options.getQualifiedTableName()); - successFlowFile.set(session.putAllAttributes(successFlowFile.get(), updateAttributes)); - session.getProvenanceReporter().send(successFlowFile.get(), options.getMetaStoreURI()); - result.routeTo(successFlowFile.get(), REL_SUCCESS); - } else { - session.remove(successFlowFile.get()); - } - - if (failedRecordCount.get() > 0) { - // There were some failed records, so transfer that flow file to failure - Map updateAttributes = new HashMap<>(); - updateAttributes.put(HIVE_STREAMING_RECORD_COUNT_ATTR, Integer.toString(failedRecordCount.get())); - updateAttributes.put(AbstractHiveQLProcessor.ATTR_OUTPUT_TABLES, options.getQualifiedTableName()); - failureFlowFile.set(session.putAllAttributes(failureFlowFile.get(), updateAttributes)); - result.routeTo(failureFlowFile.get(), REL_FAILURE); - } else { - session.remove(failureFlowFile.get()); - } - - result.getRoutedFlowFiles().forEach((relationship, flowFiles) -> { - session.transfer(flowFiles, relationship); - }); - } - - } - - private static class ShouldRetryException extends RuntimeException { - private ShouldRetryException(String message, Throwable cause) { - super(message, cause); - } - } - - private ExceptionHandler.OnError> onHiveRecordsError(ProcessContext context, ProcessSession session, Map writers) { - return RollbackOnFailure.createOnError((fc, input, res, e) -> { - - if (res.penalty() == ErrorTypes.Penalty.Yield) { - context.yield(); - } - - switch (res.destination()) { - case Failure: - // Add the failed record to the failure flow file - getLogger().error(String.format("Error writing %s to Hive Streaming transaction due to %s", input, e), e); - fc.appendRecordsToFailure(session, input); - break; - - case Retry: - // If we can't connect to the endpoint, exit the loop and let the outer exception handler route the original flow file to retry - abortAndCloseWriters(writers); - throw new ShouldRetryException("Hive Streaming connect/write error, flow file will be penalized and routed to retry. " + e, e); - - case Self: - getLogger().error(String.format("Error writing %s to Hive Streaming transaction due to %s", input, e), e); - abortAndCloseWriters(writers); - break; - - default: - abortAndCloseWriters(writers); - if (e instanceof ProcessException) { - throw (ProcessException) e; - } else { - throw new ProcessException(String.format("Error writing %s to Hive Streaming transaction due to %s", input, e), e); - } - } - }); - } - - private ExceptionHandler.OnError onHiveRecordError(ProcessContext context, ProcessSession session, Map writers) { - return (fc, input, res, e) -> onHiveRecordsError(context, session, writers).apply(fc, Collections.singletonList(input), res, e); - } - - private ExceptionHandler.OnError onRecordError(ProcessContext context, ProcessSession session, Map writers) { - return (fc, input, res, e) -> onHiveRecordError(context, session, writers).apply(fc, new HiveStreamingRecord(null, input), res, e); - } - - @Override - public void onTrigger(ProcessContext context, ProcessSessionFactory sessionFactory) throws ProcessException { - final FunctionContext functionContext = new FunctionContext(context.getProperty(ROLLBACK_ON_FAILURE).asBoolean(), getLogger()); - RollbackOnFailure.onTrigger(context, sessionFactory, functionContext, getLogger(), session -> onTrigger(context, session, functionContext)); - } - - private void onTrigger(ProcessContext context, ProcessSession session, FunctionContext functionContext) throws ProcessException { - FlowFile flowFile = session.get(); - if (flowFile == null) { - return; - } - - final String dbName = context.getProperty(DB_NAME).evaluateAttributeExpressions(flowFile).getValue(); - final String tableName = context.getProperty(TABLE_NAME).evaluateAttributeExpressions(flowFile).getValue(); - - // Only allow one thread to work on a DB/table at a time - final Semaphore newSemaphore = new Semaphore(1); - Semaphore semaphore = tableSemaphoreMap.putIfAbsent(dbName + "." + tableName, newSemaphore); - if (semaphore == null) { - semaphore = newSemaphore; - } - - boolean gotSemaphore = false; - try { - gotSemaphore = semaphore.tryAcquire(0, TimeUnit.SECONDS); - } catch (InterruptedException ie) { - // Nothing to do, gotSemaphore defaults to false - } - if (!gotSemaphore) { - // We didn't get a chance to acquire, so rollback the session and try again next time - session.rollback(); - return; - } - - final ComponentLog log = getLogger(); - final String metastoreUri = context.getProperty(METASTORE_URI).evaluateAttributeExpressions(flowFile).getValue(); - - final boolean autoCreatePartitions = context.getProperty(AUTOCREATE_PARTITIONS).asBoolean(); - final Integer maxConnections = context.getProperty(MAX_OPEN_CONNECTIONS).asInteger(); - final Integer heartbeatInterval = context.getProperty(HEARTBEAT_INTERVAL).evaluateAttributeExpressions().asInteger(); - final Integer txnsPerBatch = context.getProperty(TXNS_PER_BATCH).evaluateAttributeExpressions(flowFile).asInteger(); - final Integer recordsPerTxn = context.getProperty(RECORDS_PER_TXN).evaluateAttributeExpressions(flowFile).asInteger(); - - final Map myWriters = new ConcurrentHashMap<>(); - threadWriterList.add(myWriters); - - HiveOptions o = new HiveOptions(metastoreUri, dbName, tableName) - .withTxnsPerBatch(txnsPerBatch) - .withAutoCreatePartitions(autoCreatePartitions) - .withMaxOpenConnections(maxConnections) - .withHeartBeatInterval(heartbeatInterval) - .withCallTimeout(callTimeout); - - if (SecurityUtil.isSecurityEnabled(hiveConfig)) { - final String explicitPrincipal = context.getProperty(kerberosProperties.getKerberosPrincipal()).evaluateAttributeExpressions().getValue(); - final String explicitKeytab = context.getProperty(kerberosProperties.getKerberosKeytab()).evaluateAttributeExpressions().getValue(); - final KerberosCredentialsService credentialsService = context.getProperty(KERBEROS_CREDENTIALS_SERVICE).asControllerService(KerberosCredentialsService.class); - - final String resolvedPrincipal; - final String resolvedKeytab; - if (credentialsService == null) { - resolvedPrincipal = explicitPrincipal; - resolvedKeytab = explicitKeytab; - } else { - resolvedPrincipal = credentialsService.getPrincipal(); - resolvedKeytab = credentialsService.getKeytab(); - } - - o = o.withKerberosPrincipal(resolvedPrincipal).withKerberosKeytab(resolvedKeytab); - } - - final HiveOptions options = o; - - // Store the original class loader, then explicitly set it to this class's classloader (for use by the Hive Metastore) - ClassLoader originalClassloader = Thread.currentThread().getContextClassLoader(); - Thread.currentThread().setContextClassLoader(this.getClass().getClassLoader()); - - final List partitionColumnList; - final String partitionColumns = context.getProperty(PARTITION_COLUMNS).evaluateAttributeExpressions().getValue(); - if (partitionColumns == null || partitionColumns.isEmpty()) { - partitionColumnList = Collections.emptyList(); - } else { - String[] partitionCols = partitionColumns.split(","); - partitionColumnList = new ArrayList<>(partitionCols.length); - for (String col : partitionCols) { - partitionColumnList.add(col.trim()); - } - } - - final AtomicReference> successfulRecords = new AtomicReference<>(); - successfulRecords.set(new ArrayList<>()); - final FlowFile inputFlowFile = flowFile; - - final RoutingResult result = new RoutingResult(); - final ExceptionHandler exceptionHandler = new ExceptionHandler<>(); - exceptionHandler.mapException(s -> { - try { - if (s == null) { - return ErrorTypes.PersistentFailure; - } - throw s; - - } catch (IllegalArgumentException - | AvroRuntimeException - | HiveWriter.WriteFailure - | SerializationError inputError) { - - return ErrorTypes.InvalidInput; - - } catch (HiveWriter.CommitFailure - | HiveWriter.TxnBatchFailure - | HiveWriter.TxnFailure writerTxError) { - - return ErrorTypes.TemporalInputFailure; - - } catch (ConnectionError - | HiveWriter.ConnectFailure connectionError) { - // Can't connect to Hive endpoint. - log.error("Error connecting to Hive endpoint: table {} at {}", - new Object[]{options.getTableName(), options.getMetaStoreURI()}); - - return ErrorTypes.TemporalFailure; - - } catch (IOException - | InterruptedException tempError) { - return ErrorTypes.TemporalFailure; - - } catch (Exception t) { - return ErrorTypes.UnknownFailure; - } - }); - final BiFunction adjustError = RollbackOnFailure.createAdjustError(getLogger()); - exceptionHandler.adjustError(adjustError); - - // Create output flow files and their Avro writers - functionContext.setFlowFiles(session.create(inputFlowFile), session.create(inputFlowFile)); - - try { - session.read(inputFlowFile, new InputStreamCallback() { - @Override - public void process(InputStream in) throws IOException { - try (final DataFileStream reader = new DataFileStream<>(in, new GenericDatumReader())) { - - GenericRecord currRecord = null; - - // Copy codec and schema information to all writers - final String codec = reader.getMetaString(DataFileConstants.CODEC) == null - ? DataFileConstants.NULL_CODEC - : reader.getMetaString(DataFileConstants.CODEC); - - functionContext.initAvroWriters(session, codec, reader); - - Runnable flushSuccessfulRecords = () -> { - // Now send the records to the successful FlowFile and update the success count - functionContext.appendRecordsToSuccess(session, successfulRecords.get()); - // Clear the list of successful records, we'll use it at the end when we flush whatever records are left - successfulRecords.set(new ArrayList<>()); - }; - - while (reader.hasNext()) { - // We can NOT reuse currRecord here, because currRecord is accumulated in successful records. - // If we use the same GenericRecord instance, every record ends up having the same contents. - // To avoid this, we need to create a brand new GenericRecord instance here each time. - currRecord = reader.next(); - functionContext.recordCount.incrementAndGet(); - - // Extract the partition values (they must be put separately into the Hive Streaming API) - List partitionValues = new ArrayList<>(); - - if (!exceptionHandler.execute(functionContext, currRecord, input -> { - for (String partition : partitionColumnList) { - Object partitionValue = input.get(partition); - if (partitionValue == null) { - throw new IllegalArgumentException("Partition column '" + partition + "' not found in Avro record"); - } - partitionValues.add(partitionValue.toString()); - } - }, onRecordError(context, session, myWriters))) { - continue; - } - - final HiveStreamingRecord record = new HiveStreamingRecord(partitionValues, currRecord); - final AtomicReference hiveWriterRef = new AtomicReference<>(); - - // Write record to Hive streaming - if (!exceptionHandler.execute(functionContext, record, input -> { - - final HiveEndPoint endPoint = makeHiveEndPoint(record.getPartitionValues(), options); - final HiveWriter hiveWriter = getOrCreateWriter(myWriters, options, endPoint); - hiveWriterRef.set(hiveWriter); - - hiveWriter.write(record.getRecord().toString().getBytes(StandardCharsets.UTF_8)); - successfulRecords.get().add(record); - - }, onHiveRecordError(context, session, myWriters))) { - continue; - } - - // If we've reached the records-per-transaction limit, flush the Hive Writer and update the Avro Writer for successful records - final HiveWriter hiveWriter = hiveWriterRef.get(); - if (hiveWriter.getTotalRecords() >= recordsPerTxn) { - exceptionHandler.execute(functionContext, successfulRecords.get(), input -> { - - hiveWriter.flush(true); - // Proceed function context. Process session can't be rollback anymore. - functionContext.proceed(); - - // Now send the records to the success relationship and update the success count - flushSuccessfulRecords.run(); - - }, onHiveRecordsError(context, session, myWriters).andThen((fc, input, res, commitException) -> { - // Reset hiveWriter for succeeding records. - switch (res.destination()) { - case Retry: - case Failure: - try { - // Abort current tx and move to next. - hiveWriter.abort(); - } catch (Exception e) { - // Can't even abort properly, throw a process exception - throw new ProcessException(e); - } - } - })); - } - } - - exceptionHandler.execute(functionContext, successfulRecords.get(), input -> { - // Finish any transactions - flushAllWriters(myWriters, true); - closeAllWriters(myWriters); - - // Now send any remaining records to the success relationship and update the count - flushSuccessfulRecords.run(); - - // Append successfulRecords on failure. - }, onHiveRecordsError(context, session, myWriters)); - - } catch (IOException ioe) { - // The Avro file is invalid (or may not be an Avro file at all), send it to failure - final ErrorTypes.Result adjusted = adjustError.apply(functionContext, ErrorTypes.InvalidInput); - final String msg = "The incoming flow file can not be read as an Avro file"; - switch (adjusted.destination()) { - case Failure: - log.error(msg, ioe); - result.routeTo(inputFlowFile, REL_FAILURE); - break; - case ProcessException: - throw new ProcessException(msg, ioe); - - } - } - } - }); - - // If we got here, we've processed the outgoing flow files correctly, so remove the incoming one if necessary - if (result.getRoutedFlowFiles().values().stream().noneMatch(routed -> routed.contains(inputFlowFile))) { - session.remove(inputFlowFile); - } - - } catch (DiscontinuedException e) { - // The input FlowFile processing is discontinued. Keep it in the input queue. - getLogger().warn("Discontinued processing for {} due to {}", new Object[]{flowFile, e}, e); - result.routeTo(flowFile, Relationship.SELF); - - } catch (ShouldRetryException e) { - // This exception is already a result of adjusting an error, so simply transfer the FlowFile to retry. - getLogger().error(e.getMessage(), e); - flowFile = session.penalize(flowFile); - result.routeTo(flowFile, REL_RETRY); - - } finally { - threadWriterList.remove(myWriters); - functionContext.transferFlowFiles(session, result, options); - // Restore original class loader, might not be necessary but is good practice since the processor task changed it - Thread.currentThread().setContextClassLoader(originalClassloader); - semaphore.release(); - } - } - - @OnStopped - public void cleanup() { - validationResourceHolder.set(null); // trigger re-validation of resources - - ComponentLog log = getLogger(); - sendHeartBeat.set(false); - for(Map allWriters : threadWriterList) { - for (Map.Entry entry : allWriters.entrySet()) { - try { - HiveWriter w = entry.getValue(); - w.flushAndClose(); - } catch (Exception ex) { - log.warn("Error while closing writer to " + entry.getKey() + ". Exception follows.", ex); - if (ex instanceof InterruptedException) { - Thread.currentThread().interrupt(); - } - } - } - allWriters.clear(); - } - - if (callTimeoutPool != null) { - callTimeoutPool.shutdown(); - try { - while (!callTimeoutPool.isTerminated()) { - callTimeoutPool.awaitTermination(callTimeout, TimeUnit.MILLISECONDS); - } - } catch (Throwable t) { - log.warn("shutdown interrupted on " + callTimeoutPool, t); - } - callTimeoutPool = null; - } - - ugi = null; - kerberosUserReference.set(null); - } - - private void setupHeartBeatTimer(int heartbeatInterval) { - if (heartbeatInterval > 0) { - final ComponentLog log = getLogger(); - heartBeatTimer.schedule(new TimerTask() { - @Override - public void run() { - try { - if (sendHeartBeat.get()) { - log.debug("Start sending heartbeat on all writers"); - sendHeartBeatOnAllWriters(); - setupHeartBeatTimer(heartbeatInterval); - } - } catch (Exception e) { - log.warn("Failed to heartbeat on HiveWriter ", e); - } - } - }, heartbeatInterval * 1000); - } - } - - private void sendHeartBeatOnAllWriters() throws InterruptedException { - for(Map allWriters : threadWriterList) { - for (HiveWriter writer : allWriters.values()) { - writer.heartBeat(); - } - } - } - - private void flushAllWriters(Map writers, boolean rollToNext) - throws HiveWriter.CommitFailure, HiveWriter.TxnBatchFailure, HiveWriter.TxnFailure, InterruptedException { - for (HiveWriter writer : writers.values()) { - writer.flush(rollToNext); - } - } - - private void abortAndCloseWriters(Map writers) { - try { - abortAllWriters(writers); - closeAllWriters(writers); - } catch (Exception ie) { - getLogger().warn("unable to close hive connections. ", ie); - } - } - - /** - * Abort current Txn on all writers - */ - private void abortAllWriters(Map writers) throws InterruptedException, StreamingException, HiveWriter.TxnBatchFailure { - for (Map.Entry entry : writers.entrySet()) { - try { - entry.getValue().abort(); - } catch (Exception e) { - getLogger().error("Failed to abort hive transaction batch, HiveEndPoint " + entry.getValue() + " due to exception ", e); - } - } - } - - /** - * Closes all writers and remove them from cache - */ - private void closeAllWriters(Map writers) { - //1) Retire writers - for (Map.Entry entry : writers.entrySet()) { - try { - entry.getValue().close(); - } catch (Exception e) { - getLogger().warn("unable to close writers. ", e); - } - } - //2) Clear cache - writers.clear(); - } - - private HiveWriter getOrCreateWriter(Map writers, HiveOptions options, HiveEndPoint endPoint) throws HiveWriter.ConnectFailure, InterruptedException { - ComponentLog log = getLogger(); - try { - HiveWriter writer = writers.get(endPoint); - if (writer == null) { - log.debug("Creating Writer to Hive end point : " + endPoint); - writer = makeHiveWriter(endPoint, callTimeoutPool, getUgi(), options); - if (writers.size() > (options.getMaxOpenConnections() - 1)) { - log.info("cached HiveEndPoint size {} exceeded maxOpenConnections {} ", new Object[]{writers.size(), options.getMaxOpenConnections()}); - int retired = retireIdleWriters(writers, options.getIdleTimeout()); - if (retired == 0) { - retireEldestWriter(writers); - } - } - writers.put(endPoint, writer); - HiveUtils.logAllHiveEndPoints(writers); - } - return writer; - } catch (HiveWriter.ConnectFailure e) { - log.error("Failed to create HiveWriter for endpoint: " + endPoint, e); - throw e; - } - } - - /** - * Locate writer that has not been used for longest time and retire it - */ - private void retireEldestWriter(Map writers) { - ComponentLog log = getLogger(); - - log.info("Attempting close eldest writers"); - long oldestTimeStamp = System.currentTimeMillis(); - HiveEndPoint eldest = null; - for (Map.Entry entry : writers.entrySet()) { - if (entry.getValue().getLastUsed() < oldestTimeStamp) { - eldest = entry.getKey(); - oldestTimeStamp = entry.getValue().getLastUsed(); - } - } - try { - log.info("Closing least used Writer to Hive end point : " + eldest); - writers.remove(eldest).flushAndClose(); - } catch (IOException e) { - log.warn("Failed to close writer for end point: " + eldest, e); - } catch (InterruptedException e) { - log.warn("Interrupted when attempting to close writer for end point: " + eldest, e); - Thread.currentThread().interrupt(); - } catch (Exception e) { - log.warn("Interrupted when attempting to close writer for end point: " + eldest, e); - } - } - - /** - * Locate all writers past idle timeout and retire them - * - * @return number of writers retired - */ - private int retireIdleWriters(Map writers, int idleTimeout) { - ComponentLog log = getLogger(); - - log.info("Attempting to close idle HiveWriters"); - int count = 0; - long now = System.currentTimeMillis(); - ArrayList retirees = new ArrayList<>(); - - //1) Find retirement candidates - for (Map.Entry entry : writers.entrySet()) { - if (now - entry.getValue().getLastUsed() > idleTimeout) { - ++count; - retirees.add(entry.getKey()); - } - } - //2) Retire them - for (HiveEndPoint ep : retirees) { - try { - log.info("Closing idle Writer to Hive end point : {}", new Object[]{ep}); - writers.remove(ep).flushAndClose(); - } catch (IOException e) { - log.warn("Failed to close HiveWriter for end point: {}. Error: " + ep, e); - } catch (InterruptedException e) { - log.warn("Interrupted when attempting to close HiveWriter for end point: " + ep, e); - Thread.currentThread().interrupt(); - } catch (Exception e) { - log.warn("Interrupted when attempting to close HiveWriter for end point: " + ep, e); - } - } - return count; - } - - protected HiveEndPoint makeHiveEndPoint(List partitionValues, HiveOptions options) throws ConnectionError { - return HiveUtils.makeEndPoint(partitionValues, options); - } - - protected HiveWriter makeHiveWriter(HiveEndPoint endPoint, ExecutorService callTimeoutPool, UserGroupInformation ugi, HiveOptions options) - throws HiveWriter.ConnectFailure, InterruptedException { - return HiveUtils.makeHiveWriter(endPoint, callTimeoutPool, ugi, options, hiveConfig); - } - - protected KerberosProperties getKerberosProperties() { - return kerberosProperties; - } - - UserGroupInformation getUgi() { - getLogger().trace("getting UGI instance"); - if (kerberosUserReference.get() != null) { - // if there's a KerberosUser associated with this UGI, check the TGT and relogin if it is close to expiring - KerberosUser kerberosUser = kerberosUserReference.get(); - getLogger().debug("kerberosUser is " + kerberosUser); - try { - getLogger().debug("checking TGT on kerberosUser [{}]", new Object[] {kerberosUser}); - kerberosUser.checkTGTAndRelogin(); - } catch (final KerberosLoginException e) { - throw new ProcessException("Unable to relogin with kerberos credentials for " + kerberosUser.getPrincipal(), e); - } - } else { - getLogger().debug("kerberosUser was null, will not refresh TGT with KerberosUser"); - } - return ugi; - } - - /* - * Overridable by subclasses in the same package, mainly intended for testing purposes to allow verification without having to set environment variables. - */ - boolean isAllowExplicitKeytab() { - return Boolean.parseBoolean(System.getenv(ALLOW_EXPLICIT_KEYTAB)); - } - - protected class HiveStreamingRecord { - - private List partitionValues; - private GenericRecord record; - - public HiveStreamingRecord(List partitionValues, GenericRecord record) { - this.partitionValues = partitionValues; - this.record = record; - } - - public List getPartitionValues() { - return partitionValues; - } - - public GenericRecord getRecord() { - return record; - } - - } -} - diff --git a/nifi-nar-bundles/nifi-hive-bundle/nifi-hive-processors/src/main/java/org/apache/nifi/processors/hive/SelectHiveQL.java b/nifi-nar-bundles/nifi-hive-bundle/nifi-hive-processors/src/main/java/org/apache/nifi/processors/hive/SelectHiveQL.java deleted file mode 100644 index be17f847df..0000000000 --- a/nifi-nar-bundles/nifi-hive-bundle/nifi-hive-processors/src/main/java/org/apache/nifi/processors/hive/SelectHiveQL.java +++ /dev/null @@ -1,572 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.nifi.processors.hive; - -import org.apache.commons.io.IOUtils; -import org.apache.commons.lang.StringUtils; -import org.apache.commons.lang3.tuple.Pair; -import org.apache.nifi.annotation.behavior.EventDriven; -import org.apache.nifi.annotation.behavior.InputRequirement; -import org.apache.nifi.annotation.behavior.InputRequirement.Requirement; -import org.apache.nifi.annotation.behavior.WritesAttribute; -import org.apache.nifi.annotation.behavior.WritesAttributes; -import org.apache.nifi.annotation.documentation.CapabilityDescription; -import org.apache.nifi.annotation.documentation.DeprecationNotice; -import org.apache.nifi.annotation.documentation.Tags; -import org.apache.nifi.annotation.lifecycle.OnScheduled; -import org.apache.nifi.components.PropertyDescriptor; -import org.apache.nifi.dbcp.hive.HiveDBCPService; -import org.apache.nifi.expression.ExpressionLanguageScope; -import org.apache.nifi.flowfile.FlowFile; -import org.apache.nifi.flowfile.attributes.CoreAttributes; -import org.apache.nifi.logging.ComponentLog; -import org.apache.nifi.processor.ProcessContext; -import org.apache.nifi.processor.ProcessSession; -import org.apache.nifi.processor.ProcessSessionFactory; -import org.apache.nifi.processor.Relationship; -import org.apache.nifi.processor.exception.ProcessException; -import org.apache.nifi.processor.util.StandardValidators; -import org.apache.nifi.processor.util.pattern.PartialFunctions; -import org.apache.nifi.util.StopWatch; -import org.apache.nifi.util.hive.CsvOutputOptions; -import org.apache.nifi.util.hive.HiveJdbcCommon; - -import java.nio.charset.Charset; -import java.sql.Connection; -import java.sql.PreparedStatement; -import java.sql.ResultSet; -import java.sql.SQLException; -import java.sql.Statement; -import java.util.ArrayList; -import java.util.Collections; -import java.util.HashMap; -import java.util.HashSet; -import java.util.LinkedList; -import java.util.List; -import java.util.Map; -import java.util.Set; -import java.util.UUID; -import java.util.concurrent.TimeUnit; -import java.util.concurrent.atomic.AtomicLong; - -import static org.apache.nifi.util.hive.HiveJdbcCommon.AVRO; -import static org.apache.nifi.util.hive.HiveJdbcCommon.CSV; -import static org.apache.nifi.util.hive.HiveJdbcCommon.CSV_MIME_TYPE; -import static org.apache.nifi.util.hive.HiveJdbcCommon.MIME_TYPE_AVRO_BINARY; -import static org.apache.nifi.util.hive.HiveJdbcCommon.NORMALIZE_NAMES_FOR_AVRO; - -@EventDriven -@InputRequirement(Requirement.INPUT_ALLOWED) -@Tags({"hive", "sql", "select", "jdbc", "query", "database"}) -@CapabilityDescription("Execute provided HiveQL SELECT query against a Hive database connection. Query result will be converted to Avro or CSV format." - + " Streaming is used so arbitrarily large result sets are supported. This processor can be scheduled to run on " - + "a timer, or cron expression, using the standard scheduling methods, or it can be triggered by an incoming FlowFile. " - + "If it is triggered by an incoming FlowFile, then attributes of that FlowFile will be available when evaluating the " - + "select query. FlowFile attribute 'selecthiveql.row.count' indicates how many rows were selected.") -@WritesAttributes({ - @WritesAttribute(attribute = "mime.type", description = "Sets the MIME type for the outgoing flowfile to application/avro-binary for Avro or text/csv for CSV."), - @WritesAttribute(attribute = "filename", description = "Adds .avro or .csv to the filename attribute depending on which output format is selected."), - @WritesAttribute(attribute = "selecthiveql.row.count", description = "Indicates how many rows were selected/returned by the query."), - @WritesAttribute(attribute = "selecthiveql.query.duration", description = "Combined duration of the query execution time and fetch time in milliseconds. " - + "If 'Max Rows Per Flow File' is set, then this number will reflect only the fetch time for the rows in the Flow File instead of the entire result set."), - @WritesAttribute(attribute = "selecthiveql.query.executiontime", description = "Duration of the query execution time in milliseconds. " - + "This number will reflect the query execution time regardless of the 'Max Rows Per Flow File' setting."), - @WritesAttribute(attribute = "selecthiveql.query.fetchtime", description = "Duration of the result set fetch time in milliseconds. " - + "If 'Max Rows Per Flow File' is set, then this number will reflect only the fetch time for the rows in the Flow File instead of the entire result set."), - @WritesAttribute(attribute = "fragment.identifier", description = "If 'Max Rows Per Flow File' is set then all FlowFiles from the same query result set " - + "will have the same value for the fragment.identifier attribute. This can then be used to correlate the results."), - @WritesAttribute(attribute = "fragment.count", description = "If 'Max Rows Per Flow File' is set then this is the total number of " - + "FlowFiles produced by a single ResultSet. This can be used in conjunction with the " - + "fragment.identifier attribute in order to know how many FlowFiles belonged to the same incoming ResultSet."), - @WritesAttribute(attribute = "fragment.index", description = "If 'Max Rows Per Flow File' is set then the position of this FlowFile in the list of " - + "outgoing FlowFiles that were all derived from the same result set FlowFile. This can be " - + "used in conjunction with the fragment.identifier attribute to know which FlowFiles originated from the same query result set and in what order " - + "FlowFiles were produced"), - @WritesAttribute(attribute = "query.input.tables", description = "Contains input table names in comma delimited 'databaseName.tableName' format.") -}) -@DeprecationNotice(classNames = "org.apache.nifi.processors.hive.SelectHive3QL") -public class SelectHiveQL extends AbstractHiveQLProcessor { - - public static final String RESULT_ROW_COUNT = "selecthiveql.row.count"; - public static final String RESULT_QUERY_DURATION = "selecthiveql.query.duration"; - public static final String RESULT_QUERY_EXECUTION_TIME = "selecthiveql.query.executiontime"; - public static final String RESULT_QUERY_FETCH_TIME = "selecthiveql.query.fetchtime"; - - // Relationships - public static final Relationship REL_SUCCESS = new Relationship.Builder() - .name("success") - .description("Successfully created FlowFile from HiveQL query result set.") - .build(); - public static final Relationship REL_FAILURE = new Relationship.Builder() - .name("failure") - .description("HiveQL query execution failed. Incoming FlowFile will be penalized and routed to this relationship.") - .build(); - - - public static final PropertyDescriptor HIVEQL_PRE_QUERY = new PropertyDescriptor.Builder() - .name("hive-pre-query") - .displayName("HiveQL Pre-Query") - .description("A semicolon-delimited list of queries executed before the main SQL query is executed. " - + "Example: 'set tez.queue.name=queue1; set hive.exec.orc.split.strategy=ETL; set hive.exec.reducers.bytes.per.reducer=1073741824'. " - + "Note, the results/outputs of these queries will be suppressed if successfully executed.") - .required(false) - .addValidator(StandardValidators.NON_EMPTY_VALIDATOR) - .expressionLanguageSupported(ExpressionLanguageScope.FLOWFILE_ATTRIBUTES) - .build(); - - public static final PropertyDescriptor HIVEQL_SELECT_QUERY = new PropertyDescriptor.Builder() - .name("hive-query") - .displayName("HiveQL Select Query") - .description("HiveQL SELECT query to execute. If this is not set, the query is assumed to be in the content of an incoming FlowFile.") - .required(false) - .addValidator(StandardValidators.NON_EMPTY_VALIDATOR) - .expressionLanguageSupported(ExpressionLanguageScope.FLOWFILE_ATTRIBUTES) - .build(); - - public static final PropertyDescriptor HIVEQL_POST_QUERY = new PropertyDescriptor.Builder() - .name("hive-post-query") - .displayName("HiveQL Post-Query") - .description("A semicolon-delimited list of queries executed after the main SQL query is executed. " - + "Note, the results/outputs of these queries will be suppressed if successfully executed.") - .required(false) - .addValidator(StandardValidators.NON_EMPTY_VALIDATOR) - .expressionLanguageSupported(ExpressionLanguageScope.FLOWFILE_ATTRIBUTES) - .build(); - - public static final PropertyDescriptor FETCH_SIZE = new PropertyDescriptor.Builder() - .name("hive-fetch-size") - .displayName("Fetch Size") - .description("The number of result rows to be fetched from the result set at a time. This is a hint to the driver and may not be " - + "honored and/or exact. If the value specified is zero, then the hint is ignored.") - .defaultValue("0") - .required(true) - .addValidator(StandardValidators.NON_NEGATIVE_INTEGER_VALIDATOR) - .expressionLanguageSupported(ExpressionLanguageScope.FLOWFILE_ATTRIBUTES) - .build(); - - public static final PropertyDescriptor MAX_ROWS_PER_FLOW_FILE = new PropertyDescriptor.Builder() - .name("hive-max-rows") - .displayName("Max Rows Per Flow File") - .description("The maximum number of result rows that will be included in a single FlowFile. " + - "This will allow you to break up very large result sets into multiple FlowFiles. If the value specified is zero, then all rows are returned in a single FlowFile.") - .defaultValue("0") - .required(true) - .addValidator(StandardValidators.NON_NEGATIVE_INTEGER_VALIDATOR) - .expressionLanguageSupported(ExpressionLanguageScope.FLOWFILE_ATTRIBUTES) - .build(); - - public static final PropertyDescriptor MAX_FRAGMENTS = new PropertyDescriptor.Builder() - .name("hive-max-frags") - .displayName("Maximum Number of Fragments") - .description("The maximum number of fragments. If the value specified is zero, then all fragments are returned. " + - "This prevents OutOfMemoryError when this processor ingests huge table.") - .defaultValue("0") - .required(true) - .addValidator(StandardValidators.NON_NEGATIVE_INTEGER_VALIDATOR) - .expressionLanguageSupported(ExpressionLanguageScope.FLOWFILE_ATTRIBUTES) - .build(); - - public static final PropertyDescriptor HIVEQL_CSV_HEADER = new PropertyDescriptor.Builder() - .name("csv-header") - .displayName("CSV Header") - .description("Include Header in Output") - .required(true) - .allowableValues("true", "false") - .defaultValue("true") - .addValidator(StandardValidators.BOOLEAN_VALIDATOR) - .build(); - - public static final PropertyDescriptor HIVEQL_CSV_ALT_HEADER = new PropertyDescriptor.Builder() - .name("csv-alt-header") - .displayName("Alternate CSV Header") - .description("Comma separated list of header fields") - .required(false) - .addValidator(StandardValidators.NON_EMPTY_VALIDATOR) - .expressionLanguageSupported(ExpressionLanguageScope.FLOWFILE_ATTRIBUTES) - .build(); - - public static final PropertyDescriptor HIVEQL_CSV_DELIMITER = new PropertyDescriptor.Builder() - .name("csv-delimiter") - .displayName("CSV Delimiter") - .description("CSV Delimiter used to separate fields") - .required(true) - .defaultValue(",") - .addValidator(StandardValidators.NON_EMPTY_VALIDATOR) - .expressionLanguageSupported(ExpressionLanguageScope.FLOWFILE_ATTRIBUTES) - .build(); - - public static final PropertyDescriptor HIVEQL_CSV_QUOTE = new PropertyDescriptor.Builder() - .name("csv-quote") - .displayName("CSV Quote") - .description("Whether to force quoting of CSV fields. Note that this might conflict with the setting for CSV Escape.") - .required(true) - .allowableValues("true", "false") - .defaultValue("true") - .addValidator(StandardValidators.BOOLEAN_VALIDATOR) - .build(); - public static final PropertyDescriptor HIVEQL_CSV_ESCAPE = new PropertyDescriptor.Builder() - .name("csv-escape") - .displayName("CSV Escape") - .description("Whether to escape CSV strings in output. Note that this might conflict with the setting for CSV Quote.") - .required(true) - .allowableValues("true", "false") - .defaultValue("true") - .addValidator(StandardValidators.BOOLEAN_VALIDATOR) - .build(); - - public static final PropertyDescriptor HIVEQL_OUTPUT_FORMAT = new PropertyDescriptor.Builder() - .name("hive-output-format") - .displayName("Output Format") - .description("How to represent the records coming from Hive (Avro, CSV, e.g.)") - .required(true) - .allowableValues(AVRO, CSV) - .defaultValue(AVRO) - .expressionLanguageSupported(ExpressionLanguageScope.NONE) - .build(); - - private final static List propertyDescriptors; - private final static Set relationships; - - /* - * Will ensure that the list of property descriptors is built only once. - * Will also create a Set of relationships - */ - static { - List _propertyDescriptors = new ArrayList<>(); - _propertyDescriptors.add(HIVE_DBCP_SERVICE); - _propertyDescriptors.add(HIVEQL_PRE_QUERY); - _propertyDescriptors.add(HIVEQL_SELECT_QUERY); - _propertyDescriptors.add(HIVEQL_POST_QUERY); - _propertyDescriptors.add(FETCH_SIZE); - _propertyDescriptors.add(MAX_ROWS_PER_FLOW_FILE); - _propertyDescriptors.add(MAX_FRAGMENTS); - _propertyDescriptors.add(HIVEQL_OUTPUT_FORMAT); - _propertyDescriptors.add(NORMALIZE_NAMES_FOR_AVRO); - _propertyDescriptors.add(HIVEQL_CSV_HEADER); - _propertyDescriptors.add(HIVEQL_CSV_ALT_HEADER); - _propertyDescriptors.add(HIVEQL_CSV_DELIMITER); - _propertyDescriptors.add(HIVEQL_CSV_QUOTE); - _propertyDescriptors.add(HIVEQL_CSV_ESCAPE); - _propertyDescriptors.add(CHARSET); - propertyDescriptors = Collections.unmodifiableList(_propertyDescriptors); - - Set _relationships = new HashSet<>(); - _relationships.add(REL_SUCCESS); - _relationships.add(REL_FAILURE); - relationships = Collections.unmodifiableSet(_relationships); - } - - @Override - protected List getSupportedPropertyDescriptors() { - return propertyDescriptors; - } - - @Override - public Set getRelationships() { - return relationships; - } - - @OnScheduled - public void setup(ProcessContext context) { - // If the query is not set, then an incoming flow file is needed. Otherwise fail the initialization - if (!context.getProperty(HIVEQL_SELECT_QUERY).isSet() && !context.hasIncomingConnection()) { - final String errorString = "Either the Select Query must be specified or there must be an incoming connection " - + "providing flowfile(s) containing a SQL select query"; - getLogger().error(errorString); - throw new ProcessException(errorString); - } - } - - @Override - public void onTrigger(ProcessContext context, ProcessSessionFactory sessionFactory) throws ProcessException { - PartialFunctions.onTrigger(context, sessionFactory, getLogger(), session -> onTrigger(context, session)); - } - - private void onTrigger(final ProcessContext context, final ProcessSession session) throws ProcessException { - FlowFile fileToProcess = (context.hasIncomingConnection() ? session.get() : null); - FlowFile flowfile = null; - - // If we have no FlowFile, and all incoming connections are self-loops then we can continue on. - // However, if we have no FlowFile and we have connections coming from other Processors, then - // we know that we should run only if we have a FlowFile. - if (context.hasIncomingConnection()) { - if (fileToProcess == null && context.hasNonLoopConnection()) { - return; - } - } - - final ComponentLog logger = getLogger(); - final HiveDBCPService dbcpService = context.getProperty(HIVE_DBCP_SERVICE).asControllerService(HiveDBCPService.class); - final Charset charset = Charset.forName(context.getProperty(CHARSET).getValue()); - - List preQueries = getQueries(context.getProperty(HIVEQL_PRE_QUERY).evaluateAttributeExpressions(fileToProcess).getValue()); - List postQueries = getQueries(context.getProperty(HIVEQL_POST_QUERY).evaluateAttributeExpressions(fileToProcess).getValue()); - - final boolean flowbased = !(context.getProperty(HIVEQL_SELECT_QUERY).isSet()); - - // Source the SQL - String hqlStatement; - - if (context.getProperty(HIVEQL_SELECT_QUERY).isSet()) { - hqlStatement = context.getProperty(HIVEQL_SELECT_QUERY).evaluateAttributeExpressions(fileToProcess).getValue(); - } else { - // If the query is not set, then an incoming flow file is required, and expected to contain a valid SQL select query. - // If there is no incoming connection, onTrigger will not be called as the processor will fail when scheduled. - final StringBuilder queryContents = new StringBuilder(); - session.read(fileToProcess, in -> queryContents.append(IOUtils.toString(in, charset))); - hqlStatement = queryContents.toString(); - } - - - final Integer fetchSize = context.getProperty(FETCH_SIZE).evaluateAttributeExpressions(fileToProcess).asInteger(); - final Integer maxRowsPerFlowFile = context.getProperty(MAX_ROWS_PER_FLOW_FILE).evaluateAttributeExpressions(fileToProcess).asInteger(); - final Integer maxFragments = context.getProperty(MAX_FRAGMENTS).isSet() - ? context.getProperty(MAX_FRAGMENTS).evaluateAttributeExpressions(fileToProcess).asInteger() - : 0; - final String outputFormat = context.getProperty(HIVEQL_OUTPUT_FORMAT).getValue(); - final boolean convertNamesForAvro = context.getProperty(NORMALIZE_NAMES_FOR_AVRO).asBoolean(); - final StopWatch stopWatch = new StopWatch(true); - final boolean header = context.getProperty(HIVEQL_CSV_HEADER).asBoolean(); - final String altHeader = context.getProperty(HIVEQL_CSV_ALT_HEADER).evaluateAttributeExpressions(fileToProcess).getValue(); - final String delimiter = context.getProperty(HIVEQL_CSV_DELIMITER).evaluateAttributeExpressions(fileToProcess).getValue(); - final boolean quote = context.getProperty(HIVEQL_CSV_QUOTE).asBoolean(); - final boolean escape = context.getProperty(HIVEQL_CSV_ESCAPE).asBoolean(); - final String fragmentIdentifier = UUID.randomUUID().toString(); - - try (final Connection con = dbcpService.getConnection(fileToProcess == null ? Collections.emptyMap() : fileToProcess.getAttributes()); - final Statement st = (flowbased ? con.prepareStatement(hqlStatement) : con.createStatement()) - ) { - Pair failure = executeConfigStatements(con, preQueries); - if (failure != null) { - // In case of failure, assigning config query to "hqlStatement" to follow current error handling - hqlStatement = failure.getLeft(); - flowfile = (fileToProcess == null) ? session.create() : fileToProcess; - fileToProcess = null; - throw failure.getRight(); - } - if (fetchSize != null && fetchSize > 0) { - try { - st.setFetchSize(fetchSize); - } catch (SQLException se) { - // Not all drivers support this, just log the error (at debug level) and move on - logger.debug("Cannot set fetch size to {} due to {}", new Object[]{fetchSize, se.getLocalizedMessage()}, se); - } - } - - final List resultSetFlowFiles = new ArrayList<>(); - try { - logger.debug("Executing query {}", new Object[]{hqlStatement}); - if (flowbased) { - // Hive JDBC Doesn't Support this yet: - // ParameterMetaData pmd = ((PreparedStatement)st).getParameterMetaData(); - // int paramCount = pmd.getParameterCount(); - - // Alternate way to determine number of params in SQL. - int paramCount = StringUtils.countMatches(hqlStatement, "?"); - - if (paramCount > 0) { - setParameters(1, (PreparedStatement) st, paramCount, fileToProcess.getAttributes()); - } - } - - final StopWatch executionTime = new StopWatch(true); - final ResultSet resultSet; - - try { - resultSet = (flowbased ? ((PreparedStatement) st).executeQuery() : st.executeQuery(hqlStatement)); - } catch (SQLException se) { - // If an error occurs during the query, a flowfile is expected to be routed to failure, so ensure one here - flowfile = (fileToProcess == null) ? session.create() : fileToProcess; - fileToProcess = null; - throw se; - } - long executionTimeElapsed = executionTime.getElapsed(TimeUnit.MILLISECONDS); - - int fragmentIndex = 0; - String baseFilename = (fileToProcess != null) ? fileToProcess.getAttribute(CoreAttributes.FILENAME.key()) : null; - while (true) { - final AtomicLong nrOfRows = new AtomicLong(0L); - final StopWatch fetchTime = new StopWatch(true); - - flowfile = (fileToProcess == null) ? session.create() : session.create(fileToProcess); - if (baseFilename == null) { - baseFilename = flowfile.getAttribute(CoreAttributes.FILENAME.key()); - } - try { - flowfile = session.write(flowfile, out -> { - try { - if (AVRO.equals(outputFormat)) { - nrOfRows.set(HiveJdbcCommon.convertToAvroStream(resultSet, out, maxRowsPerFlowFile, convertNamesForAvro)); - } else if (CSV.equals(outputFormat)) { - CsvOutputOptions options = new CsvOutputOptions(header, altHeader, delimiter, quote, escape, maxRowsPerFlowFile); - nrOfRows.set(HiveJdbcCommon.convertToCsvStream(resultSet, out, options)); - } else { - nrOfRows.set(0L); - throw new ProcessException("Unsupported output format: " + outputFormat); - } - } catch (final SQLException | RuntimeException e) { - throw new ProcessException("Error during database query or conversion of records.", e); - } - }); - } catch (ProcessException e) { - // Add flowfile to results before rethrowing so it will be removed from session in outer catch - resultSetFlowFiles.add(flowfile); - throw e; - } - long fetchTimeElapsed = fetchTime.getElapsed(TimeUnit.MILLISECONDS); - - if (nrOfRows.get() > 0 || resultSetFlowFiles.isEmpty()) { - final Map attributes = new HashMap<>(); - // Set attribute for how many rows were selected - attributes.put(RESULT_ROW_COUNT, String.valueOf(nrOfRows.get())); - - try { - // Set input/output table names by parsing the query - attributes.putAll(toQueryTableAttributes(findTableNames(hqlStatement))); - } catch (Exception e) { - // If failed to parse the query, just log a warning message, but continue. - getLogger().warn("Failed to parse query: {} due to {}", new Object[]{hqlStatement, e}, e); - } - - // Set MIME type on output document and add extension to filename - if (AVRO.equals(outputFormat)) { - attributes.put(CoreAttributes.MIME_TYPE.key(), MIME_TYPE_AVRO_BINARY); - attributes.put(CoreAttributes.FILENAME.key(), baseFilename + "." + fragmentIndex + ".avro"); - } else if (CSV.equals(outputFormat)) { - attributes.put(CoreAttributes.MIME_TYPE.key(), CSV_MIME_TYPE); - attributes.put(CoreAttributes.FILENAME.key(), baseFilename + "." + fragmentIndex + ".csv"); - } - - if (maxRowsPerFlowFile > 0) { - attributes.put("fragment.identifier", fragmentIdentifier); - attributes.put("fragment.index", String.valueOf(fragmentIndex)); - } - - attributes.put(RESULT_QUERY_DURATION, String.valueOf(executionTimeElapsed + fetchTimeElapsed)); - attributes.put(RESULT_QUERY_EXECUTION_TIME, String.valueOf(executionTimeElapsed)); - attributes.put(RESULT_QUERY_FETCH_TIME, String.valueOf(fetchTimeElapsed)); - - flowfile = session.putAllAttributes(flowfile, attributes); - - logger.info("{} contains {} " + outputFormat + " records; transferring to 'success'", - new Object[]{flowfile, nrOfRows.get()}); - - if (context.hasIncomingConnection()) { - // If the flow file came from an incoming connection, issue a Fetch provenance event - session.getProvenanceReporter().fetch(flowfile, dbcpService.getConnectionURL(), - "Retrieved " + nrOfRows.get() + " rows", stopWatch.getElapsed(TimeUnit.MILLISECONDS)); - } else { - // If we created a flow file from rows received from Hive, issue a Receive provenance event - session.getProvenanceReporter().receive(flowfile, dbcpService.getConnectionURL(), stopWatch.getElapsed(TimeUnit.MILLISECONDS)); - } - resultSetFlowFiles.add(flowfile); - } else { - // If there were no rows returned (and the first flow file has been sent, we're done processing, so remove the flowfile and carry on - session.remove(flowfile); - if (resultSetFlowFiles != null && resultSetFlowFiles.size()>0) { - flowfile = resultSetFlowFiles.get(resultSetFlowFiles.size()-1); - } - break; - } - - fragmentIndex++; - if (maxFragments > 0 && fragmentIndex >= maxFragments) { - break; - } - } - - for (int i = 0; i < resultSetFlowFiles.size(); i++) { - // Set count on all FlowFiles - if (maxRowsPerFlowFile > 0) { - resultSetFlowFiles.set(i, - session.putAttribute(resultSetFlowFiles.get(i), "fragment.count", Integer.toString(fragmentIndex))); - } - } - - } catch (final SQLException e) { - throw e; - } - - failure = executeConfigStatements(con, postQueries); - if (failure != null) { - hqlStatement = failure.getLeft(); - if (resultSetFlowFiles != null) { - resultSetFlowFiles.forEach(ff -> session.remove(ff)); - } - flowfile = (fileToProcess == null) ? session.create() : fileToProcess; - fileToProcess = null; - throw failure.getRight(); - } - - session.transfer(resultSetFlowFiles, REL_SUCCESS); - if (fileToProcess != null) { - session.remove(fileToProcess); - } - } catch (final ProcessException | SQLException e) { - logger.error("Issue processing SQL {} due to {}.", new Object[]{hqlStatement, e}); - if (flowfile == null) { - // This can happen if any exceptions occur while setting up the connection, statement, etc. - logger.error("Unable to execute HiveQL select query {} due to {}. No FlowFile to route to failure", - new Object[]{hqlStatement, e}); - context.yield(); - } else { - if (context.hasIncomingConnection()) { - logger.error("Unable to execute HiveQL select query {} for {} due to {}; routing to failure", - new Object[]{hqlStatement, flowfile, e}); - flowfile = session.penalize(flowfile); - } else { - logger.error("Unable to execute HiveQL select query {} due to {}; routing to failure", - new Object[]{hqlStatement, e}); - context.yield(); - } - session.transfer(flowfile, REL_FAILURE); - } - } - } - - /* - * Executes given queries using pre-defined connection. - * Returns null on success, or a query string if failed. - */ - protected Pair executeConfigStatements(final Connection con, final List configQueries){ - if (configQueries == null || configQueries.isEmpty()) { - return null; - } - - for (String confSQL : configQueries) { - try(final Statement st = con.createStatement()){ - st.execute(confSQL); - } catch (SQLException e) { - return Pair.of(confSQL, e); - } - } - return null; - } - - protected List getQueries(final String value) { - if (value == null || value.length() == 0 || value.trim().length() == 0) { - return null; - } - final List queries = new LinkedList<>(); - for (String query : value.split(";")) { - if (query.trim().length() > 0) { - queries.add(query.trim()); - } - } - return queries; - } -} \ No newline at end of file diff --git a/nifi-nar-bundles/nifi-hive-bundle/nifi-hive-processors/src/main/java/org/apache/nifi/processors/hive/UpdateHiveTable.java b/nifi-nar-bundles/nifi-hive-bundle/nifi-hive-processors/src/main/java/org/apache/nifi/processors/hive/UpdateHiveTable.java deleted file mode 100644 index 61fe28d282..0000000000 --- a/nifi-nar-bundles/nifi-hive-bundle/nifi-hive-processors/src/main/java/org/apache/nifi/processors/hive/UpdateHiveTable.java +++ /dev/null @@ -1,769 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.nifi.processors.hive; - -import org.apache.hadoop.hive.ql.io.orc.NiFiOrcUtils; -import org.apache.nifi.annotation.behavior.InputRequirement; -import org.apache.nifi.annotation.behavior.ReadsAttribute; -import org.apache.nifi.annotation.behavior.ReadsAttributes; -import org.apache.nifi.annotation.behavior.RequiresInstanceClassLoading; -import org.apache.nifi.annotation.behavior.WritesAttribute; -import org.apache.nifi.annotation.behavior.WritesAttributes; -import org.apache.nifi.annotation.documentation.CapabilityDescription; -import org.apache.nifi.annotation.documentation.DeprecationNotice; -import org.apache.nifi.annotation.documentation.Tags; -import org.apache.nifi.components.AllowableValue; -import org.apache.nifi.components.PropertyDescriptor; -import org.apache.nifi.components.ValidationContext; -import org.apache.nifi.components.ValidationResult; -import org.apache.nifi.components.Validator; -import org.apache.nifi.dbcp.hive.HiveDBCPService; -import org.apache.nifi.expression.ExpressionLanguageScope; -import org.apache.nifi.flowfile.FlowFile; -import org.apache.nifi.flowfile.attributes.CoreAttributes; -import org.apache.nifi.logging.ComponentLog; -import org.apache.nifi.processor.AbstractProcessor; -import org.apache.nifi.processor.ProcessContext; -import org.apache.nifi.processor.ProcessSession; -import org.apache.nifi.processor.ProcessorInitializationContext; -import org.apache.nifi.processor.Relationship; -import org.apache.nifi.processor.exception.ProcessException; -import org.apache.nifi.processor.util.StandardValidators; -import org.apache.nifi.processor.util.pattern.DiscontinuedException; -import org.apache.nifi.processors.hadoop.exception.RecordReaderFactoryException; -import org.apache.nifi.serialization.MalformedRecordException; -import org.apache.nifi.serialization.RecordReader; -import org.apache.nifi.serialization.RecordReaderFactory; -import org.apache.nifi.serialization.RecordSetWriter; -import org.apache.nifi.serialization.RecordSetWriterFactory; -import org.apache.nifi.serialization.SimpleRecordSchema; -import org.apache.nifi.serialization.WriteResult; -import org.apache.nifi.serialization.record.MapRecord; -import org.apache.nifi.serialization.record.Record; -import org.apache.nifi.serialization.record.RecordField; -import org.apache.nifi.serialization.record.RecordSchema; -import org.apache.nifi.util.StringUtils; - -import java.io.IOException; -import java.io.InputStream; -import java.sql.Connection; -import java.sql.ResultSet; -import java.sql.SQLException; -import java.sql.Statement; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Collection; -import java.util.Collections; -import java.util.HashMap; -import java.util.HashSet; -import java.util.List; -import java.util.Map; -import java.util.Objects; -import java.util.Set; -import java.util.stream.Collectors; - - -@Tags({"hive", "metadata", "jdbc", "database", "table"}) -@CapabilityDescription("This processor uses a Hive JDBC connection and incoming records to generate any Hive 1.2 table changes needed to support the incoming records.") -@ReadsAttributes({ - @ReadsAttribute(attribute = "hive.table.management.strategy", description = "This attribute is read if the 'Table Management Strategy' property is configured " - + "to use the value of this attribute. The value of this attribute should correspond (ignoring case) to a valid option of the 'Table Management Strategy' property.") -}) -@WritesAttributes({ - @WritesAttribute(attribute = "output.table", description = "This attribute is written on the flow files routed to the 'success' " - + "and 'failure' relationships, and contains the target table name."), - @WritesAttribute(attribute = "output.path", description = "This attribute is written on the flow files routed to the 'success' " - + "and 'failure' relationships, and contains the path on the file system to the table (or partition location if the table is partitioned)."), - @WritesAttribute(attribute = "mime.type", description = "Sets the mime.type attribute to the MIME Type specified by the Record Writer, only if a Record Writer is specified " - + "and Update Field Names is 'true'."), - @WritesAttribute(attribute = "record.count", description = "Sets the number of records in the FlowFile, only if a Record Writer is specified and Update Field Names is 'true'.") -}) -@InputRequirement(InputRequirement.Requirement.INPUT_REQUIRED) -@RequiresInstanceClassLoading -@DeprecationNotice(classNames = "org.apache.nifi.processors.hive.UpdateHive3Table") -public class UpdateHiveTable extends AbstractProcessor { - - static final String TEXTFILE = "TEXTFILE"; - static final String SEQUENCEFILE = "SEQUENCEFILE"; - static final String ORC = "ORC"; - static final String PARQUET = "PARQUET"; - static final String AVRO = "AVRO"; - static final String RCFILE = "RCFILE"; - - static final AllowableValue TEXTFILE_STORAGE = new AllowableValue(TEXTFILE, TEXTFILE, "Stored as plain text files. TEXTFILE is the default file format, unless the configuration " - + "parameter hive.default.fileformat has a different setting."); - static final AllowableValue SEQUENCEFILE_STORAGE = new AllowableValue(SEQUENCEFILE, SEQUENCEFILE, "Stored as compressed Sequence Files."); - static final AllowableValue ORC_STORAGE = new AllowableValue(ORC, ORC, "Stored as ORC file format. Supports ACID Transactions & Cost-based Optimizer (CBO). " - + "Stores column-level metadata."); - static final AllowableValue PARQUET_STORAGE = new AllowableValue(PARQUET, PARQUET, "Stored as Parquet format for the Parquet columnar storage format."); - static final AllowableValue AVRO_STORAGE = new AllowableValue(AVRO, AVRO, "Stored as Avro format."); - static final AllowableValue RCFILE_STORAGE = new AllowableValue(RCFILE, RCFILE, "Stored as Record Columnar File format."); - - static final AllowableValue CREATE_IF_NOT_EXISTS = new AllowableValue("Create If Not Exists", "Create If Not Exists", - "Create a table with the given schema if it does not already exist"); - static final AllowableValue FAIL_IF_NOT_EXISTS = new AllowableValue("Fail If Not Exists", "Fail If Not Exists", - "If the target does not already exist, log an error and route the flowfile to failure"); - - static final String TABLE_MANAGEMENT_STRATEGY_ATTRIBUTE = "hive.table.management.strategy"; - static final AllowableValue MANAGED_TABLE = new AllowableValue("Managed", "Managed", - "Any tables created by this processor will be managed tables (see Hive documentation for details)."); - static final AllowableValue EXTERNAL_TABLE = new AllowableValue("External", "External", - "Any tables created by this processor will be external tables located at the `External Table Location` property value."); - static final AllowableValue ATTRIBUTE_DRIVEN_TABLE = new AllowableValue("Use '" + TABLE_MANAGEMENT_STRATEGY_ATTRIBUTE + "' Attribute", - "Use '" + TABLE_MANAGEMENT_STRATEGY_ATTRIBUTE + "' Attribute", - "Inspects the '" + TABLE_MANAGEMENT_STRATEGY_ATTRIBUTE + "' FlowFile attribute to determine the table management strategy. The value " - + "of this attribute must be a case-insensitive match to one of the other allowable values (Managed, External, e.g.)."); - - static final String ATTR_OUTPUT_TABLE = "output.table"; - static final String ATTR_OUTPUT_PATH = "output.path"; - - // Properties - static final PropertyDescriptor RECORD_READER = new PropertyDescriptor.Builder() - .name("record-reader") - .displayName("Record Reader") - .description("The service for reading incoming flow files. The reader is only used to determine the schema of the records, the actual records will not be processed.") - .identifiesControllerService(RecordReaderFactory.class) - .required(true) - .build(); - - static final PropertyDescriptor HIVE_DBCP_SERVICE = new PropertyDescriptor.Builder() - .name("hive-dbcp-service") - .displayName("Hive Database Connection Pooling Service") - .description("The Hive Controller Service that is used to obtain connection(s) to the Hive database") - .required(true) - .identifiesControllerService(HiveDBCPService.class) - .build(); - - static final PropertyDescriptor TABLE_NAME = new PropertyDescriptor.Builder() - .name("hive-table-name") - .displayName("Table Name") - .description("The name of the database table to update. If the table does not exist, then it will either be created or an error thrown, depending " - + "on the value of the Create Table property.") - .required(true) - .expressionLanguageSupported(ExpressionLanguageScope.FLOWFILE_ATTRIBUTES) - .addValidator(StandardValidators.NON_EMPTY_VALIDATOR) - .build(); - - static final PropertyDescriptor CREATE_TABLE = new PropertyDescriptor.Builder() - .name("hive-create-table") - .displayName("Create Table Strategy") - .description("Specifies how to process the target table when it does not exist (create it, fail, e.g.).") - .required(true) - .addValidator(Validator.VALID) - .allowableValues(CREATE_IF_NOT_EXISTS, FAIL_IF_NOT_EXISTS) - .defaultValue(FAIL_IF_NOT_EXISTS.getValue()) - .build(); - - static final PropertyDescriptor TABLE_MANAGEMENT_STRATEGY = new PropertyDescriptor.Builder() - .name("hive-create-table-management") - .displayName("Create Table Management Strategy") - .description("Specifies (when a table is to be created) whether the table is a managed table or an external table. Note that when External is specified, the " - + "'External Table Location' property must be specified. If the '" + TABLE_MANAGEMENT_STRATEGY_ATTRIBUTE + "' value is selected, 'External Table Location' " - + "must still be specified, but can contain Expression Language or be set to the empty string, and is ignored when the attribute evaluates to 'Managed'.") - .required(true) - .addValidator(Validator.VALID) - .allowableValues(MANAGED_TABLE, EXTERNAL_TABLE, ATTRIBUTE_DRIVEN_TABLE) - .defaultValue(MANAGED_TABLE.getValue()) - .dependsOn(CREATE_TABLE, CREATE_IF_NOT_EXISTS) - .build(); - - static final PropertyDescriptor UPDATE_FIELD_NAMES = new PropertyDescriptor.Builder() - .name("hive-update-field-names") - .displayName("Update Field Names") - .description("This property indicates whether to update the output schema such that the field names are set to the exact column names from the specified " - + "table. This should be used if the incoming record field names may not match the table's column names in terms of upper- and lower-case. For example, this property should be " - + "set to true if the output FlowFile (and target table storage) is Avro format, as Hive/Impala expects the field names to match the column names exactly.") - .allowableValues("true", "false") - .defaultValue("false") - .required(true) - .build(); - - static final PropertyDescriptor RECORD_WRITER_FACTORY = new PropertyDescriptor.Builder() - .name("hive-record-writer") - .displayName("Record Writer") - .description("Specifies the Controller Service to use for writing results to a FlowFile. The Record Writer should use Inherit Schema to emulate the inferred schema behavior, i.e. " - + "an explicit schema need not be defined in the writer, and will be supplied by the same logic used to infer the schema from the column types. If Create Table Strategy is set " - + "'Create If Not Exists', the Record Writer's output format must match the Record Reader's format in order for the data to be placed in the created table location. Note that " - + "this property is only used if 'Update Field Names' is set to true and the field names do not all match the column names exactly. If no " - + "update is needed for any field names (or 'Update Field Names' is false), the Record Writer is not used and instead the input FlowFile is routed to success or failure " - + "without modification.") - .identifiesControllerService(RecordSetWriterFactory.class) - .dependsOn(UPDATE_FIELD_NAMES, "true") - .required(true) - .build(); - - static final PropertyDescriptor EXTERNAL_TABLE_LOCATION = new PropertyDescriptor.Builder() - .name("hive-external-table-location") - .displayName("External Table Location") - .description("Specifies (when an external table is to be created) the file path (in HDFS, e.g.) to store table data.") - .required(true) - .expressionLanguageSupported(ExpressionLanguageScope.FLOWFILE_ATTRIBUTES) - .addValidator(StandardValidators.ATTRIBUTE_EXPRESSION_LANGUAGE_VALIDATOR) - .dependsOn(TABLE_MANAGEMENT_STRATEGY, EXTERNAL_TABLE, ATTRIBUTE_DRIVEN_TABLE) - .build(); - - static final PropertyDescriptor TABLE_STORAGE_FORMAT = new PropertyDescriptor.Builder() - .name("hive-storage-format") - .displayName("Create Table Storage Format") - .description("If a table is to be created, the specified storage format will be used.") - .required(true) - .addValidator(StandardValidators.NON_EMPTY_VALIDATOR) - .allowableValues(TEXTFILE_STORAGE, SEQUENCEFILE_STORAGE, ORC_STORAGE, PARQUET_STORAGE, AVRO_STORAGE, RCFILE_STORAGE) - .defaultValue(TEXTFILE) - .dependsOn(CREATE_TABLE, CREATE_IF_NOT_EXISTS) - .build(); - - static final PropertyDescriptor QUERY_TIMEOUT = new PropertyDescriptor.Builder() - .name("hive-query-timeout") - .displayName("Query Timeout") - .description("Sets the number of seconds the driver will wait for a query to execute. " - + "A value of 0 means no timeout. NOTE: Non-zero values may not be supported by the driver.") - .defaultValue("0") - .required(true) - .addValidator(StandardValidators.INTEGER_VALIDATOR) - .expressionLanguageSupported(ExpressionLanguageScope.FLOWFILE_ATTRIBUTES) - .build(); - - static final PropertyDescriptor PARTITION_CLAUSE = new PropertyDescriptor.Builder() - .name("hive-partition-clause") - .displayName("Partition Clause") - .description("Specifies a comma-separated list of attribute names and optional data types corresponding to the partition columns of the target table. Simply put, if the table is " - + "partitioned or is to be created with partitions, each partition name should be an attribute on the FlowFile and listed in this property. This assumes all incoming records " - + "belong to the same partition and the partition columns are not fields in the record. An example of specifying this field is if PartitionRecord " - + "is upstream and two partition columns 'name' (of type string) and 'age' (of type integer) are used, then this property can be set to 'name string, age int'. The data types " - + "are optional and if partition(s) are to be created they will default to string type if not specified. For non-string primitive types, specifying the data type for existing " - + "partition columns is helpful for interpreting the partition value(s). If the table exists, the data types need not be specified " - + "(and are ignored in that case). This property must be set if the table is partitioned, and there must be an attribute for each partition column in the table. " - + "The values of the attributes will be used as the partition values, and the resulting output.path attribute value will reflect the location of the partition in the filesystem " - + "(for use downstream in processors such as PutHDFS).") - .required(false) - .expressionLanguageSupported(ExpressionLanguageScope.FLOWFILE_ATTRIBUTES) - .addValidator(StandardValidators.NON_EMPTY_VALIDATOR) - .build(); - - // Relationships - public static final Relationship REL_SUCCESS = new Relationship.Builder() - .name("success") - .description("A FlowFile containing records routed to this relationship after the record has been successfully transmitted to Hive.") - .build(); - - public static final Relationship REL_FAILURE = new Relationship.Builder() - .name("failure") - .description("A FlowFile containing records routed to this relationship if the record could not be transmitted to Hive.") - .build(); - - private List propertyDescriptors; - private Set relationships; - - @Override - protected void init(ProcessorInitializationContext context) { - List props = new ArrayList<>(); - props.add(RECORD_READER); - props.add(HIVE_DBCP_SERVICE); - props.add(TABLE_NAME); - props.add(PARTITION_CLAUSE); - props.add(CREATE_TABLE); - props.add(TABLE_MANAGEMENT_STRATEGY); - props.add(EXTERNAL_TABLE_LOCATION); - props.add(TABLE_STORAGE_FORMAT); - props.add(UPDATE_FIELD_NAMES); - props.add(RECORD_WRITER_FACTORY); - props.add(QUERY_TIMEOUT); - - propertyDescriptors = Collections.unmodifiableList(props); - - Set _relationships = new HashSet<>(); - _relationships.add(REL_SUCCESS); - _relationships.add(REL_FAILURE); - relationships = Collections.unmodifiableSet(_relationships); - } - - @Override - protected List getSupportedPropertyDescriptors() { - return propertyDescriptors; - } - - @Override - public Set getRelationships() { - return relationships; - } - - @Override - protected Collection customValidate(ValidationContext validationContext) { - List validationResults = new ArrayList<>(super.customValidate(validationContext)); - final boolean recordWriterFactorySet = validationContext.getProperty(RECORD_WRITER_FACTORY).isSet(); - final boolean createIfNotExists = validationContext.getProperty(CREATE_TABLE).getValue().equals(CREATE_IF_NOT_EXISTS.getValue()); - final boolean updateFieldNames = validationContext.getProperty(UPDATE_FIELD_NAMES).asBoolean(); - - if (!recordWriterFactorySet && updateFieldNames) { - validationResults.add(new ValidationResult.Builder().subject(RECORD_WRITER_FACTORY.getDisplayName()) - .explanation("Record Writer must be set if 'Update Field Names' is true").valid(false).build()); - } - final String tableManagementStrategy = validationContext.getProperty(TABLE_MANAGEMENT_STRATEGY).getValue(); - final boolean managedTable; - if (!ATTRIBUTE_DRIVEN_TABLE.getValue().equals(tableManagementStrategy)) { - managedTable = MANAGED_TABLE.getValue().equals(tableManagementStrategy); - // Ensure valid configuration for external tables - if (createIfNotExists && !managedTable && !validationContext.getProperty(EXTERNAL_TABLE_LOCATION).isSet()) { - validationResults.add(new ValidationResult.Builder().subject(EXTERNAL_TABLE_LOCATION.getDisplayName()) - .explanation("External Table Location must be set when Table Management Strategy is set to External").valid(false).build()); - } - } - return validationResults; - } - - @Override - public void onTrigger(ProcessContext context, ProcessSession session) throws ProcessException { - - FlowFile flowFile = session.get(); - if (flowFile == null) { - return; - } - - final RecordReaderFactory recordReaderFactory = context.getProperty(RECORD_READER).asControllerService(RecordReaderFactory.class); - final RecordSetWriterFactory recordWriterFactory = context.getProperty(RECORD_WRITER_FACTORY).asControllerService(RecordSetWriterFactory.class); - final String tableName = context.getProperty(TABLE_NAME).evaluateAttributeExpressions(flowFile).getValue(); - final String partitionClauseString = context.getProperty(PARTITION_CLAUSE).evaluateAttributeExpressions(flowFile).getValue(); - List partitionClauseElements = null; - if (!StringUtils.isEmpty(partitionClauseString)) { - partitionClauseElements = Arrays.stream(partitionClauseString.split(",")).filter(Objects::nonNull).map(String::trim).collect(Collectors.toList()); - } - - final ComponentLog log = getLogger(); - - try { - final RecordReader reader; - - try (final InputStream in = session.read(flowFile)) { - // if we fail to create the RecordReader then we want to route to failure, so we need to - // handle this separately from the other IOExceptions which normally route to retry - try { - reader = recordReaderFactory.createRecordReader(flowFile, in, getLogger()); - } catch (Exception e) { - throw new RecordReaderFactoryException("Unable to create RecordReader", e); - } - } catch (RecordReaderFactoryException rrfe) { - log.error( - "Failed to create {} for {} - routing to failure", - new Object[]{RecordReader.class.getSimpleName(), flowFile}, - rrfe - ); - // Since we are wrapping the exceptions above there should always be a cause - // but it's possible it might not have a message. This handles that by logging - // the name of the class thrown. - Throwable c = rrfe.getCause(); - if (c != null) { - session.putAttribute(flowFile, "record.error.message", (c.getLocalizedMessage() != null) ? c.getLocalizedMessage() : c.getClass().getCanonicalName() + " Thrown"); - } else { - session.putAttribute(flowFile, "record.error.message", rrfe.getClass().getCanonicalName() + " Thrown"); - } - session.transfer(flowFile, REL_FAILURE); - return; - } - - RecordSchema recordSchema = reader.getSchema(); - - final boolean createIfNotExists = context.getProperty(CREATE_TABLE).getValue().equals(CREATE_IF_NOT_EXISTS.getValue()); - final boolean updateFieldNames = context.getProperty(UPDATE_FIELD_NAMES).asBoolean(); - if (recordWriterFactory == null && updateFieldNames) { - throw new ProcessException("Record Writer must be set if 'Update Field Names' is true"); - } - final String tableManagementStrategy = context.getProperty(TABLE_MANAGEMENT_STRATEGY).getValue(); - final boolean managedTable; - if (ATTRIBUTE_DRIVEN_TABLE.getValue().equals(tableManagementStrategy)) { - String tableManagementStrategyAttribute = flowFile.getAttribute(TABLE_MANAGEMENT_STRATEGY_ATTRIBUTE); - if (MANAGED_TABLE.getValue().equalsIgnoreCase(tableManagementStrategyAttribute)) { - managedTable = true; - } else if (EXTERNAL_TABLE.getValue().equalsIgnoreCase(tableManagementStrategyAttribute)) { - managedTable = false; - } else { - log.error("The '{}' attribute either does not exist or has invalid value: {}. Must be one of (ignoring case): Managed, External. " - + "Routing flowfile to failure", - new Object[]{TABLE_MANAGEMENT_STRATEGY_ATTRIBUTE, tableManagementStrategyAttribute}); - session.transfer(flowFile, REL_FAILURE); - return; - } - } else { - managedTable = MANAGED_TABLE.getValue().equals(tableManagementStrategy); - } - - // Ensure valid configuration for external tables - if (createIfNotExists && !managedTable && !context.getProperty(EXTERNAL_TABLE_LOCATION).isSet()) { - throw new IOException("External Table Location must be set when Table Management Strategy is set to External"); - } - final String externalTableLocation = managedTable ? null : context.getProperty(EXTERNAL_TABLE_LOCATION).evaluateAttributeExpressions(flowFile).getValue(); - if (!managedTable && StringUtils.isEmpty(externalTableLocation)) { - log.error("External Table Location has invalid value: {}. Routing flowfile to failure", new Object[]{externalTableLocation}); - session.transfer(flowFile, REL_FAILURE); - return; - } - final String storageFormat = context.getProperty(TABLE_STORAGE_FORMAT).getValue(); - final HiveDBCPService dbcpService = context.getProperty(HIVE_DBCP_SERVICE).asControllerService(HiveDBCPService.class); - try (final Connection connection = dbcpService.getConnection()) { - final Map attributes = new HashMap<>(flowFile.getAttributes()); - OutputMetadataHolder outputMetadataHolder = checkAndUpdateTableSchema(attributes, connection, recordSchema, tableName, partitionClauseElements, - createIfNotExists, externalTableLocation, storageFormat, updateFieldNames); - if (outputMetadataHolder != null) { - // The output schema changed (i.e. field names were updated), so write out the corresponding FlowFile - try { - final FlowFile inputFlowFile = flowFile; - flowFile = session.write(flowFile, (in, out) -> { - - // if we fail to create the RecordReader then we want to route to failure, so we need to - // handle this separately from the other IOExceptions which normally route to retry - final RecordReader recordReader; - final RecordSetWriter recordSetWriter; - try { - recordReader = recordReaderFactory.createRecordReader(inputFlowFile, in, getLogger()); - recordSetWriter = recordWriterFactory.createWriter(getLogger(), outputMetadataHolder.getOutputSchema(), out, attributes); - } catch (Exception e) { - if(e instanceof IOException) { - throw (IOException) e; - } - throw new IOException(new RecordReaderFactoryException("Unable to create RecordReader", e)); - } - - WriteResult writeResult = updateRecords(recordSchema, outputMetadataHolder, recordReader, recordSetWriter); - recordSetWriter.flush(); - recordSetWriter.close(); - attributes.put("record.count", String.valueOf(writeResult.getRecordCount())); - attributes.put(CoreAttributes.MIME_TYPE.key(), recordSetWriter.getMimeType()); - attributes.putAll(writeResult.getAttributes()); - }); - } catch (final Exception e) { - getLogger().error("Failed to process {}; will route to failure", new Object[]{flowFile, e}); - // Since we are wrapping the exceptions above there should always be a cause - // but it's possible it might not have a message. This handles that by logging - // the name of the class thrown. - Throwable c = e.getCause(); - if (c != null) { - session.putAttribute(flowFile, "record.error.message", (c.getLocalizedMessage() != null) ? c.getLocalizedMessage() : c.getClass().getCanonicalName() + " Thrown"); - } else { - session.putAttribute(flowFile, "record.error.message", e.getClass().getCanonicalName() + " Thrown"); - } - session.transfer(flowFile, REL_FAILURE); - return; - } - - } - attributes.put(ATTR_OUTPUT_TABLE, tableName); - flowFile = session.putAllAttributes(flowFile, attributes); - session.getProvenanceReporter().invokeRemoteProcess(flowFile, dbcpService.getConnectionURL()); - session.transfer(flowFile, REL_SUCCESS); - } - } catch (IOException | SQLException e) { - - flowFile = session.putAttribute(flowFile, ATTR_OUTPUT_TABLE, tableName); - log.error("Exception while processing {} - routing to failure", new Object[]{flowFile}, e); - session.transfer(flowFile, REL_FAILURE); - - } catch (DiscontinuedException e) { - // The input FlowFile processing is discontinued. Keep it in the input queue. - getLogger().warn("Discontinued processing for {} due to {}", new Object[]{flowFile, e}, e); - session.transfer(flowFile, Relationship.SELF); - } catch (Throwable t) { - throw (t instanceof ProcessException) ? (ProcessException) t : new ProcessException(t); - } - } - - private synchronized OutputMetadataHolder checkAndUpdateTableSchema(Map attributes, final Connection conn, final RecordSchema schema, - final String tableName, List partitionClause, final boolean createIfNotExists, - final String externalTableLocation, final String storageFormat, final boolean updateFieldNames) throws IOException { - // Read in the current table metadata, compare it to the reader's schema, and - // add any columns from the schema that are missing in the table - try (Statement s = conn.createStatement()) { - // Determine whether the table exists - ResultSet tables = s.executeQuery("SHOW TABLES"); - List tableNames = new ArrayList<>(); - String hiveTableName; - while (tables.next() && StringUtils.isNotEmpty(hiveTableName = tables.getString(1))) { - tableNames.add(hiveTableName); - } - - List columnsToAdd = new ArrayList<>(); - String outputPath; - boolean tableCreated = false; - if (!tableNames.contains(tableName) && createIfNotExists) { - StringBuilder createTableStatement = new StringBuilder(); - for (RecordField recordField : schema.getFields()) { - String recordFieldName = recordField.getFieldName(); - // The field does not exist in the table, add it - columnsToAdd.add("`" + recordFieldName + "` " + NiFiOrcUtils.getHiveTypeFromFieldType(recordField.getDataType(), true)); - getLogger().debug("Adding column " + recordFieldName + " to table " + tableName); - } - - // Handle partition clause - if (partitionClause == null) { - partitionClause = Collections.emptyList(); - } - List validatedPartitionClause = new ArrayList<>(partitionClause.size()); - for (String partition : partitionClause) { - String[] partitionInfo = partition.split(" "); - if (partitionInfo.length != 2) { - validatedPartitionClause.add("`" + partitionInfo[0] + "` string"); - } else { - validatedPartitionClause.add("`" + partitionInfo[0] + "` " + partitionInfo[1]); - } - } - - createTableStatement.append("CREATE ") - .append(externalTableLocation == null ? "" : "EXTERNAL ") - .append("TABLE IF NOT EXISTS `") - .append(tableName) - .append("` (") - .append(String.join(", ", columnsToAdd)) - .append(") ") - .append(validatedPartitionClause.isEmpty() ? "" : "PARTITIONED BY (" + String.join(", ", validatedPartitionClause) + ") ") - .append("STORED AS ") - .append(storageFormat) - .append(externalTableLocation == null ? "" : " LOCATION '" + externalTableLocation + "'"); - - String createTableSql = createTableStatement.toString(); - - if (StringUtils.isNotEmpty(createTableSql)) { - // Perform the table create - getLogger().info("Executing Hive DDL: " + createTableSql); - s.execute(createTableSql); - } - - tableCreated = true; - } - - // Process the table (columns, partitions, location, etc.) - List hiveColumns = new ArrayList<>(); - - String describeTable = "DESC FORMATTED `" + tableName + "`"; - ResultSet tableInfo = s.executeQuery(describeTable); - // Result is 3 columns, col_name, data_type, comment. Check the first row for a header and skip if so, otherwise add column name - tableInfo.next(); - String columnName = tableInfo.getString(1); - if (StringUtils.isNotEmpty(columnName) && !columnName.startsWith("#")) { - hiveColumns.add(columnName); - } - // If the column was a header, check for a blank line to follow and skip it, otherwise add the column name - if (columnName.startsWith("#")) { - tableInfo.next(); - columnName = tableInfo.getString(1); - if (StringUtils.isNotEmpty(columnName)) { - hiveColumns.add(columnName); - } - } - - // Collect all column names - while (tableInfo.next() && StringUtils.isNotEmpty(columnName = tableInfo.getString(1))) { - hiveColumns.add(columnName); - } - - // Collect all partition columns - boolean moreRows = true; - boolean headerFound = false; - while (moreRows && !headerFound) { - String line = tableInfo.getString(1); - if ("# Partition Information".equals(line)) { - headerFound = true; - } else if ("# Detailed Table Information".equals(line)) { - // Not partitioned, exit the loop with headerFound = false - break; - } - moreRows = tableInfo.next(); - } - - List partitionColumns = new ArrayList<>(); - List partitionColumnsEqualsValueList = new ArrayList<>(); - List partitionColumnsLocationList = new ArrayList<>(); - if (headerFound) { - // If the table is partitioned, construct the partition=value strings for each partition column - String partitionColumnName; - columnName = tableInfo.getString(1); - if (StringUtils.isNotEmpty(columnName) && !columnName.startsWith("#")) { - partitionColumns.add(columnName); - } - // If the column was a header, check for a blank line to follow and skip it, otherwise add the column name - if (columnName.startsWith("#")) { - tableInfo.next(); - columnName = tableInfo.getString(1); - if (StringUtils.isNotEmpty(columnName)) { - partitionColumns.add(columnName); - } - } - while (tableInfo.next() && StringUtils.isNotEmpty(partitionColumnName = tableInfo.getString(1))) { - partitionColumns.add(partitionColumnName); - } - - final int partitionColumnsSize = partitionColumns.size(); - final int partitionClauseSize = (partitionClause == null) ? 0 : partitionClause.size(); - if (partitionClauseSize != partitionColumnsSize) { - throw new IOException("Found " + partitionColumnsSize + " partition columns but " + partitionClauseSize + " partition values were supplied"); - } - - for (int i = 0; i < partitionClauseSize; i++) { - String partitionName = partitionClause.get(i).split(" ")[0]; - String partitionValue = attributes.get(partitionName); - if (StringUtils.isEmpty(partitionValue)) { - throw new IOException("No value found for partition value attribute '" + partitionName + "'"); - } - if (!partitionColumns.contains(partitionName)) { - throw new IOException("Cannot add partition '" + partitionName + "' to existing table"); - } - partitionColumnsEqualsValueList.add("`" + partitionName + "`='" + partitionValue + "'"); - // Add unquoted version for the output path - partitionColumnsLocationList.add(partitionName + "=" + partitionValue); - } - } - - // Get table location - moreRows = true; - headerFound = false; - while (moreRows && !headerFound) { - String line = tableInfo.getString(1); - if (line.startsWith("Location:")) { - headerFound = true; - continue; // Don't do a next() here, need to get the second column value - } - moreRows = tableInfo.next(); - } - String tableLocation = tableInfo.getString(2); - - String alterTableSql; - // If the table wasn't newly created, alter it accordingly - if (!tableCreated) { - StringBuilder alterTableStatement = new StringBuilder(); - // Handle new columns - for (RecordField recordField : schema.getFields()) { - String recordFieldName = recordField.getFieldName().toLowerCase(); - if (!hiveColumns.contains(recordFieldName) && !partitionColumns.contains(recordFieldName)) { - // The field does not exist in the table (and is not a partition column), add it - columnsToAdd.add("`" + recordFieldName + "` " + NiFiOrcUtils.getHiveTypeFromFieldType(recordField.getDataType(), true)); - getLogger().info("Adding column " + recordFieldName + " to table " + tableName); - } - } - - if (!columnsToAdd.isEmpty()) { - alterTableStatement.append("ALTER TABLE `") - .append(tableName) - .append("` ADD COLUMNS (") - .append(String.join(", ", columnsToAdd)) - .append(")"); - - alterTableSql = alterTableStatement.toString(); - if (StringUtils.isNotEmpty(alterTableSql)) { - // Perform the table update - getLogger().info("Executing Hive DDL: " + alterTableSql); - s.execute(alterTableSql); - } - } - } - - outputPath = tableLocation; - - // Handle new partition values - if (!partitionColumnsEqualsValueList.isEmpty()) { - alterTableSql = "ALTER TABLE `" + - tableName + - "` ADD IF NOT EXISTS PARTITION (" + - String.join(", ", partitionColumnsEqualsValueList) + - ")"; - if (StringUtils.isNotEmpty(alterTableSql)) { - // Perform the table update - getLogger().info("Executing Hive DDL: " + alterTableSql); - s.execute(alterTableSql); - } - // Add attribute for HDFS location of the partition values - outputPath = tableLocation + "/" + String.join("/", partitionColumnsLocationList); - } - - // If updating field names, return a new RecordSchema, otherwise return null - OutputMetadataHolder outputMetadataHolder; - if (updateFieldNames) { - List inputRecordFields = schema.getFields(); - List outputRecordFields = new ArrayList<>(); - Map fieldMap = new HashMap<>(); - boolean needsUpdating = false; - - for (RecordField inputRecordField : inputRecordFields) { - final String inputRecordFieldName = inputRecordField.getFieldName(); - boolean found = false; - for (String hiveColumnName : hiveColumns) { - if (inputRecordFieldName.equalsIgnoreCase(hiveColumnName)) { - // Set a flag if the field name doesn't match the column name exactly. This overall flag will determine whether - // the records need updating (if true) or not (if false) - if (!inputRecordFieldName.equals(hiveColumnName)) { - needsUpdating = true; - } - fieldMap.put(inputRecordFieldName, hiveColumnName); - outputRecordFields.add(new RecordField(hiveColumnName, inputRecordField.getDataType(), inputRecordField.getDefaultValue(), inputRecordField.isNullable())); - found = true; - break; - } - } - if (!found) { - // If the input field wasn't a Hive table column, add it back to the schema as-is - fieldMap.put(inputRecordFieldName, inputRecordFieldName); - } - } - outputMetadataHolder = needsUpdating ? new OutputMetadataHolder(new SimpleRecordSchema(outputRecordFields), fieldMap) - : null; - } else { - outputMetadataHolder = null; - } - attributes.put(ATTR_OUTPUT_PATH, outputPath); - return outputMetadataHolder; - } catch (Exception e) { - throw new IOException(e); - } - } - - private synchronized WriteResult updateRecords(final RecordSchema inputRecordSchema, final OutputMetadataHolder outputMetadataHolder, - final RecordReader reader, final RecordSetWriter writer) throws IOException { - try { - writer.beginRecordSet(); - Record inputRecord; - while((inputRecord = reader.nextRecord()) != null) { - List inputRecordFields = inputRecordSchema.getFields(); - Map outputRecordFields = new HashMap<>(inputRecordFields.size()); - // Copy values from input field name to output field name - for(Map.Entry mapping : outputMetadataHolder.getFieldMap().entrySet()) { - outputRecordFields.put(mapping.getValue(), inputRecord.getValue(mapping.getKey())); - } - Record outputRecord = new MapRecord(outputMetadataHolder.getOutputSchema(), outputRecordFields); - writer.write(outputRecord); - } - return writer.finishRecordSet(); - - } catch (MalformedRecordException mre) { - throw new IOException("Error reading records: "+mre.getMessage(), mre); - } - } - - private static class OutputMetadataHolder { - private final RecordSchema outputSchema; - private final Map fieldMap; - - public OutputMetadataHolder(RecordSchema outputSchema, Map fieldMap) { - this.outputSchema = outputSchema; - this.fieldMap = fieldMap; - } - - public RecordSchema getOutputSchema() { - return outputSchema; - } - - public Map getFieldMap() { - return fieldMap; - } - } -} diff --git a/nifi-nar-bundles/nifi-hive-bundle/nifi-hive-processors/src/main/java/org/apache/nifi/util/hive/AuthenticationFailedException.java b/nifi-nar-bundles/nifi-hive-bundle/nifi-hive-processors/src/main/java/org/apache/nifi/util/hive/AuthenticationFailedException.java deleted file mode 100644 index 70cc6c13c4..0000000000 --- a/nifi-nar-bundles/nifi-hive-bundle/nifi-hive-processors/src/main/java/org/apache/nifi/util/hive/AuthenticationFailedException.java +++ /dev/null @@ -1,23 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.nifi.util.hive; - -public class AuthenticationFailedException extends Exception { - public AuthenticationFailedException(String reason, Exception cause) { - super(reason, cause); - } -} \ No newline at end of file diff --git a/nifi-nar-bundles/nifi-hive-bundle/nifi-hive-processors/src/main/java/org/apache/nifi/util/hive/CsvOutputOptions.java b/nifi-nar-bundles/nifi-hive-bundle/nifi-hive-processors/src/main/java/org/apache/nifi/util/hive/CsvOutputOptions.java deleted file mode 100644 index 36889129f3..0000000000 --- a/nifi-nar-bundles/nifi-hive-bundle/nifi-hive-processors/src/main/java/org/apache/nifi/util/hive/CsvOutputOptions.java +++ /dev/null @@ -1,63 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.nifi.util.hive; - -public class CsvOutputOptions { - - private boolean header = true; - private String altHeader = null; - private String delimiter = ","; - private boolean quote = false; - private boolean escape = true; - - private int maxRowsPerFlowFile = 0; - - public boolean isHeader() { - return header; - } - - public String getAltHeader() { - return altHeader; - } - - - public String getDelimiter() { - return delimiter; - } - - - public boolean isQuote() { - return quote; - } - - public boolean isEscape() { - return escape; - } - - public int getMaxRowsPerFlowFile() { - return maxRowsPerFlowFile; - } - - public CsvOutputOptions(boolean header, String altHeader, String delimiter, boolean quote, boolean escape, int maxRowsPerFlowFile) { - this.header = header; - this.altHeader = altHeader; - this.delimiter = delimiter; - this.quote = quote; - this.escape = escape; - this.maxRowsPerFlowFile = maxRowsPerFlowFile; - } -} diff --git a/nifi-nar-bundles/nifi-hive-bundle/nifi-hive-processors/src/main/java/org/apache/nifi/util/hive/HiveConfigurator.java b/nifi-nar-bundles/nifi-hive-bundle/nifi-hive-processors/src/main/java/org/apache/nifi/util/hive/HiveConfigurator.java deleted file mode 100644 index b212207ebb..0000000000 --- a/nifi-nar-bundles/nifi-hive-bundle/nifi-hive-processors/src/main/java/org/apache/nifi/util/hive/HiveConfigurator.java +++ /dev/null @@ -1,136 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.nifi.util.hive; - -import org.apache.commons.lang3.StringUtils; -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.FileSystem; -import org.apache.hadoop.fs.Path; -import org.apache.hadoop.hive.conf.HiveConf; -import org.apache.hadoop.security.UserGroupInformation; -import org.apache.nifi.components.ValidationResult; -import org.apache.nifi.hadoop.KerberosProperties; -import org.apache.nifi.hadoop.SecurityUtil; -import org.apache.nifi.logging.ComponentLog; -import org.apache.nifi.security.krb.KerberosUser; - -import java.io.IOException; -import java.util.ArrayList; -import java.util.Collection; -import java.util.List; -import java.util.concurrent.atomic.AtomicReference; - -public class HiveConfigurator { - - public Collection validate(String configFiles, String principal, String keyTab, String password, - AtomicReference validationResourceHolder, ComponentLog log) { - - final List problems = new ArrayList<>(); - ValidationResources resources = validationResourceHolder.get(); - - // if no resources in the holder, or if the holder has different resources loaded, - // then load the Configuration and set the new resources in the holder - if (resources == null || !configFiles.equals(resources.getConfigResources())) { - log.debug("Reloading validation resources"); - resources = new ValidationResources(configFiles, getConfigurationFromFiles(configFiles)); - validationResourceHolder.set(resources); - } - - final Configuration hiveConfig = resources.getConfiguration(); - - problems.addAll(KerberosProperties.validatePrincipalWithKeytabOrPassword(this.getClass().getSimpleName(), hiveConfig, principal, keyTab, password, log)); - - return problems; - } - - public HiveConf getConfigurationFromFiles(final String configFiles) { - final HiveConf hiveConfig = new HiveConf(); - if (StringUtils.isNotBlank(configFiles)) { - for (final String configFile : configFiles.split(",")) { - hiveConfig.addResource(new Path(configFile.trim())); - } - } - return hiveConfig; - } - - public void preload(Configuration configuration) { - try { - FileSystem.get(configuration).close(); - UserGroupInformation.setConfiguration(configuration); - } catch (IOException ioe) { - // Suppress exception as future uses of this configuration will fail - } - } - - /** - * Acquires a {@link UserGroupInformation} using the given {@link Configuration} and {@link KerberosUser}. - * @see SecurityUtil#getUgiForKerberosUser(Configuration, KerberosUser) - * @param hiveConfig The Configuration to apply to the acquired UserGroupInformation - * @param kerberosUser The KerberosUser to authenticate - * @return A UserGroupInformation instance created using the Subject of the given KerberosUser - * @throws AuthenticationFailedException if authentication fails - */ - public UserGroupInformation authenticate(final Configuration hiveConfig, KerberosUser kerberosUser) throws AuthenticationFailedException { - try { - return SecurityUtil.getUgiForKerberosUser(hiveConfig, kerberosUser); - } catch (IOException ioe) { - throw new AuthenticationFailedException("Kerberos Authentication for Hive failed", ioe); - } - } - - /** - * As of Apache NiFi 1.5.0, due to changes made to - * {@link SecurityUtil#loginKerberos(Configuration, String, String)}, which is used by this - * class to authenticate a principal with Kerberos, Hive controller services no longer - * attempt relogins explicitly. For more information, please read the documentation for - * {@link SecurityUtil#loginKerberos(Configuration, String, String)}. - *

- * In previous versions of NiFi, a {@link org.apache.nifi.hadoop.KerberosTicketRenewer} was started by - * {@link HiveConfigurator#authenticate(Configuration, String, String, long)} when the Hive - * controller service was enabled. The use of a separate thread to explicitly relogin could cause race conditions - * with the implicit relogin attempts made by hadoop/Hive code on a thread that references the same - * {@link UserGroupInformation} instance. One of these threads could leave the - * {@link javax.security.auth.Subject} in {@link UserGroupInformation} to be cleared or in an unexpected state - * while the other thread is attempting to use the {@link javax.security.auth.Subject}, resulting in failed - * authentication attempts that would leave the Hive controller service in an unrecoverable state. - * - * @see SecurityUtil#loginKerberos(Configuration, String, String) - * @deprecated Use {@link SecurityUtil#getUgiForKerberosUser(Configuration, KerberosUser)} - */ - @Deprecated - public UserGroupInformation authenticate(final Configuration hiveConfig, String principal, String keyTab) throws AuthenticationFailedException { - UserGroupInformation ugi; - try { - ugi = SecurityUtil.loginKerberos(hiveConfig, principal, keyTab); - } catch (IOException ioe) { - throw new AuthenticationFailedException("Kerberos Authentication for Hive failed", ioe); - } - return ugi; - } - - /** - * As of Apache NiFi 1.5.0, this method has been deprecated and is now a wrapper - * method which invokes {@link HiveConfigurator#authenticate(Configuration, String, String)}. It will no longer start a - * {@link org.apache.nifi.hadoop.KerberosTicketRenewer} to perform explicit relogins. - * - * @see HiveConfigurator#authenticate(Configuration, String, String) - */ - @Deprecated - public UserGroupInformation authenticate(final Configuration hiveConfig, String principal, String keyTab, long ticketRenewalPeriod) throws AuthenticationFailedException { - return authenticate(hiveConfig, principal, keyTab); - } -} diff --git a/nifi-nar-bundles/nifi-hive-bundle/nifi-hive-processors/src/main/java/org/apache/nifi/util/hive/HiveJdbcCommon.java b/nifi-nar-bundles/nifi-hive-bundle/nifi-hive-processors/src/main/java/org/apache/nifi/util/hive/HiveJdbcCommon.java deleted file mode 100644 index fd1aaf4fad..0000000000 --- a/nifi-nar-bundles/nifi-hive-bundle/nifi-hive-processors/src/main/java/org/apache/nifi/util/hive/HiveJdbcCommon.java +++ /dev/null @@ -1,462 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.nifi.util.hive; - -import org.apache.avro.Schema; -import org.apache.avro.SchemaBuilder; -import org.apache.avro.SchemaBuilder.FieldAssembler; -import org.apache.avro.file.DataFileWriter; -import org.apache.avro.generic.GenericData; -import org.apache.avro.generic.GenericDatumWriter; -import org.apache.avro.generic.GenericRecord; -import org.apache.avro.io.DatumWriter; -import org.apache.commons.lang3.StringUtils; -import org.apache.commons.text.StringEscapeUtils; -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.Path; -import org.apache.hadoop.hive.conf.HiveConf; -import org.apache.nifi.components.PropertyDescriptor; - -import java.io.IOException; -import java.io.OutputStream; -import java.math.BigDecimal; -import java.math.BigInteger; -import java.nio.ByteBuffer; -import java.nio.charset.StandardCharsets; -import java.sql.ResultSet; -import java.sql.ResultSetMetaData; -import java.sql.SQLException; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.List; - -import static java.sql.Types.ARRAY; -import static java.sql.Types.BIGINT; -import static java.sql.Types.BINARY; -import static java.sql.Types.BIT; -import static java.sql.Types.BLOB; -import static java.sql.Types.BOOLEAN; -import static java.sql.Types.CHAR; -import static java.sql.Types.CLOB; -import static java.sql.Types.DATE; -import static java.sql.Types.DECIMAL; -import static java.sql.Types.DOUBLE; -import static java.sql.Types.FLOAT; -import static java.sql.Types.INTEGER; -import static java.sql.Types.JAVA_OBJECT; -import static java.sql.Types.LONGNVARCHAR; -import static java.sql.Types.LONGVARBINARY; -import static java.sql.Types.LONGVARCHAR; -import static java.sql.Types.NCHAR; -import static java.sql.Types.NUMERIC; -import static java.sql.Types.NVARCHAR; -import static java.sql.Types.OTHER; -import static java.sql.Types.REAL; -import static java.sql.Types.ROWID; -import static java.sql.Types.SMALLINT; -import static java.sql.Types.SQLXML; -import static java.sql.Types.STRUCT; -import static java.sql.Types.TIME; -import static java.sql.Types.TIMESTAMP; -import static java.sql.Types.TINYINT; -import static java.sql.Types.VARBINARY; -import static java.sql.Types.VARCHAR; - -/** - * JDBC / HiveQL common functions. - */ -public class HiveJdbcCommon { - - public static final String AVRO = "Avro"; - public static final String CSV = "CSV"; - - public static final String MIME_TYPE_AVRO_BINARY = "application/avro-binary"; - public static final String CSV_MIME_TYPE = "text/csv"; - - - public static final PropertyDescriptor NORMALIZE_NAMES_FOR_AVRO = new PropertyDescriptor.Builder() - .name("hive-normalize-avro") - .displayName("Normalize Table/Column Names") - .description("Whether to change non-Avro-compatible characters in column names to Avro-compatible characters. For example, colons and periods " - + "will be changed to underscores in order to build a valid Avro record.") - .allowableValues("true", "false") - .defaultValue("false") - .required(true) - .build(); - - public static long convertToAvroStream(final ResultSet rs, final OutputStream outStream, final int maxRows, boolean convertNames) throws SQLException, IOException { - return convertToAvroStream(rs, outStream, null, maxRows, convertNames, null); - } - - - public static long convertToAvroStream(final ResultSet rs, final OutputStream outStream, String recordName, final int maxRows, boolean convertNames, ResultSetRowCallback callback) - throws SQLException, IOException { - final Schema schema = createSchema(rs, recordName, convertNames); - final GenericRecord rec = new GenericData.Record(schema); - - final DatumWriter datumWriter = new GenericDatumWriter<>(schema); - try (final DataFileWriter dataFileWriter = new DataFileWriter<>(datumWriter)) { - dataFileWriter.create(schema, outStream); - - final ResultSetMetaData meta = rs.getMetaData(); - final int nrOfColumns = meta.getColumnCount(); - long nrOfRows = 0; - while (rs.next()) { - if (callback != null) { - callback.processRow(rs); - } - for (int i = 1; i <= nrOfColumns; i++) { - final int javaSqlType = meta.getColumnType(i); - Object value = rs.getObject(i); - - if (value == null) { - rec.put(i - 1, null); - - } else if (javaSqlType == BINARY || javaSqlType == VARBINARY || javaSqlType == LONGVARBINARY || javaSqlType == BLOB || javaSqlType == CLOB) { - // bytes requires little bit different handling - ByteBuffer bb = null; - if (value instanceof byte[]) { - bb = ByteBuffer.wrap((byte[]) value); - } else if (value instanceof ByteBuffer) { - bb = (ByteBuffer) value; - } - if (bb != null) { - rec.put(i - 1, bb); - } else { - throw new IOException("Could not process binary object of type " + value.getClass().getName()); - } - - } else if (value instanceof Byte) { - // tinyint(1) type is returned by JDBC driver as java.sql.Types.TINYINT - // But value is returned by JDBC as java.lang.Byte - // (at least H2 JDBC works this way) - // direct put to avro record results: - // org.apache.avro.AvroRuntimeException: Unknown datum type java.lang.Byte - rec.put(i - 1, ((Byte) value).intValue()); - - } else if (value instanceof BigDecimal || value instanceof BigInteger) { - // Avro can't handle BigDecimal and BigInteger as numbers - it will throw an AvroRuntimeException such as: "Unknown datum type: java.math.BigDecimal: 38" - rec.put(i - 1, value.toString()); - - } else if (value instanceof Number) { - // Need to call the right getXYZ() method (instead of the getObject() method above), since Doubles are sometimes returned - // when the JDBC type is 6 (Float) for example. - if (javaSqlType == FLOAT) { - value = rs.getFloat(i); - } else if (javaSqlType == DOUBLE) { - value = rs.getDouble(i); - } else if (javaSqlType == INTEGER || javaSqlType == TINYINT || javaSqlType == SMALLINT) { - value = rs.getInt(i); - } - - rec.put(i - 1, value); - - } else if (value instanceof Boolean) { - rec.put(i - 1, value); - } else if (value instanceof java.sql.SQLXML) { - rec.put(i - 1, ((java.sql.SQLXML) value).getString()); - } else { - // The different types that we support are numbers (int, long, double, float), - // as well as boolean values and Strings. Since Avro doesn't provide - // timestamp types, we want to convert those to Strings. So we will cast anything other - // than numbers or booleans to strings by using the toString() method. - rec.put(i - 1, value.toString()); - } - } - dataFileWriter.append(rec); - nrOfRows += 1; - - if (maxRows > 0 && nrOfRows == maxRows) - break; - } - - return nrOfRows; - } - } - - public static Schema createSchema(final ResultSet rs, boolean convertNames) throws SQLException { - return createSchema(rs, null, false); - } - - /** - * Creates an Avro schema from a result set. If the table/record name is known a priori and provided, use that as a - * fallback for the record name if it cannot be retrieved from the result set, and finally fall back to a default value. - * - * @param rs The result set to convert to Avro - * @param recordName The a priori record name to use if it cannot be determined from the result set. - * @param convertNames Whether to convert column/table names to be legal Avro names - * @return A Schema object representing the result set converted to an Avro record - * @throws SQLException if any error occurs during conversion - */ - public static Schema createSchema(final ResultSet rs, String recordName, boolean convertNames) throws SQLException { - final ResultSetMetaData meta = rs.getMetaData(); - final int nrOfColumns = meta.getColumnCount(); - String tableName = StringUtils.isEmpty(recordName) ? "NiFi_SelectHiveQL_Record" : recordName; - try { - if (nrOfColumns > 0) { - // Hive JDBC doesn't support getTableName, instead it returns table.column for column name. Grab the table name from the first column - String firstColumnNameFromMeta = meta.getColumnName(1); - int tableNameDelimiter = firstColumnNameFromMeta.lastIndexOf("."); - if (tableNameDelimiter > -1) { - String tableNameFromMeta = firstColumnNameFromMeta.substring(0, tableNameDelimiter); - if (!StringUtils.isBlank(tableNameFromMeta)) { - tableName = tableNameFromMeta; - } - } - } - } catch (SQLException se) { - // Not all drivers support getTableName, so just use the previously-set default - } - - if (convertNames) { - tableName = normalizeNameForAvro(tableName); - } - final FieldAssembler builder = SchemaBuilder.record(tableName).namespace("any.data").fields(); - - /** - * Some missing Avro types - Decimal, Date types. May need some additional work. - */ - for (int i = 1; i <= nrOfColumns; i++) { - String columnNameFromMeta = meta.getColumnName(i); - // Hive returns table.column for column name. Grab the column name as the string after the last period - int columnNameDelimiter = columnNameFromMeta.lastIndexOf("."); - String columnName = columnNameFromMeta.substring(columnNameDelimiter + 1); - switch (meta.getColumnType(i)) { - case CHAR: - case LONGNVARCHAR: - case LONGVARCHAR: - case NCHAR: - case NVARCHAR: - case VARCHAR: - case ARRAY: - case STRUCT: - case JAVA_OBJECT: - case OTHER: - case SQLXML: - builder.name(columnName).type().unionOf().nullBuilder().endNull().and().stringType().endUnion().noDefault(); - break; - - case BIT: - case BOOLEAN: - builder.name(columnName).type().unionOf().nullBuilder().endNull().and().booleanType().endUnion().noDefault(); - break; - - case INTEGER: - // Default to signed type unless otherwise noted. Some JDBC drivers don't implement isSigned() - boolean signedType = true; - try { - signedType = meta.isSigned(i); - } catch (SQLException se) { - // Use signed types as default - } - if (signedType) { - builder.name(columnName).type().unionOf().nullBuilder().endNull().and().intType().endUnion().noDefault(); - } else { - builder.name(columnName).type().unionOf().nullBuilder().endNull().and().longType().endUnion().noDefault(); - } - break; - - case SMALLINT: - case TINYINT: - builder.name(columnName).type().unionOf().nullBuilder().endNull().and().intType().endUnion().noDefault(); - break; - - case BIGINT: - builder.name(columnName).type().unionOf().nullBuilder().endNull().and().longType().endUnion().noDefault(); - break; - - // java.sql.RowId is interface, is seems to be database - // implementation specific, let's convert to String - case ROWID: - builder.name(columnName).type().unionOf().nullBuilder().endNull().and().stringType().endUnion().noDefault(); - break; - - case FLOAT: - case REAL: - builder.name(columnName).type().unionOf().nullBuilder().endNull().and().floatType().endUnion().noDefault(); - break; - - case DOUBLE: - builder.name(columnName).type().unionOf().nullBuilder().endNull().and().doubleType().endUnion().noDefault(); - break; - - // Did not find direct suitable type, need to be clarified!!!! - case DECIMAL: - case NUMERIC: - builder.name(columnName).type().unionOf().nullBuilder().endNull().and().stringType().endUnion().noDefault(); - break; - - // Did not find direct suitable type, need to be clarified!!!! - case DATE: - case TIME: - case TIMESTAMP: - builder.name(columnName).type().unionOf().nullBuilder().endNull().and().stringType().endUnion().noDefault(); - break; - - case BINARY: - case VARBINARY: - case LONGVARBINARY: - case BLOB: - case CLOB: - builder.name(columnName).type().unionOf().nullBuilder().endNull().and().bytesType().endUnion().noDefault(); - break; - - - default: - throw new IllegalArgumentException("createSchema: Unknown SQL type " + meta.getColumnType(i) + " cannot be converted to Avro type"); - } - } - - return builder.endRecord(); - } - - public static long convertToCsvStream(final ResultSet rs, final OutputStream outStream, CsvOutputOptions outputOptions) throws SQLException, IOException { - return convertToCsvStream(rs, outStream, null, null, outputOptions); - } - - public static long convertToCsvStream(final ResultSet rs, final OutputStream outStream, String recordName, ResultSetRowCallback callback, CsvOutputOptions outputOptions) - throws SQLException, IOException { - - final ResultSetMetaData meta = rs.getMetaData(); - final int nrOfColumns = meta.getColumnCount(); - List columnNames = new ArrayList<>(nrOfColumns); - - if (outputOptions.isHeader()) { - if (outputOptions.getAltHeader() == null) { - for (int i = 1; i <= nrOfColumns; i++) { - String columnNameFromMeta = meta.getColumnName(i); - // Hive returns table.column for column name. Grab the column name as the string after the last period - int columnNameDelimiter = columnNameFromMeta.lastIndexOf("."); - columnNames.add(columnNameFromMeta.substring(columnNameDelimiter + 1)); - } - } else { - String[] altHeaderNames = outputOptions.getAltHeader().split(","); - columnNames = Arrays.asList(altHeaderNames); - } - } - - // Write column names as header row - outStream.write(StringUtils.join(columnNames, outputOptions.getDelimiter()).getBytes(StandardCharsets.UTF_8)); - if (outputOptions.isHeader()) { - outStream.write("\n".getBytes(StandardCharsets.UTF_8)); - } - - // Iterate over the rows - int maxRows = outputOptions.getMaxRowsPerFlowFile(); - long nrOfRows = 0; - while (rs.next()) { - if (callback != null) { - callback.processRow(rs); - } - List rowValues = new ArrayList<>(nrOfColumns); - for (int i = 1; i <= nrOfColumns; i++) { - final int javaSqlType = meta.getColumnType(i); - final Object value = rs.getObject(i); - - switch (javaSqlType) { - case CHAR: - case LONGNVARCHAR: - case LONGVARCHAR: - case NCHAR: - case NVARCHAR: - case VARCHAR: - String valueString = rs.getString(i); - if (valueString != null) { - // Removed extra quotes as those are a part of the escapeCsv when required. - StringBuilder sb = new StringBuilder(); - if (outputOptions.isQuote()) { - sb.append("\""); - if (outputOptions.isEscape()) { - sb.append(StringEscapeUtils.escapeCsv(valueString)); - } else { - sb.append(valueString); - } - sb.append("\""); - rowValues.add(sb.toString()); - } else { - if (outputOptions.isEscape()) { - rowValues.add(StringEscapeUtils.escapeCsv(valueString)); - } else { - rowValues.add(valueString); - } - } - } else { - rowValues.add(""); - } - break; - case ARRAY: - case STRUCT: - case JAVA_OBJECT: - String complexValueString = rs.getString(i); - if (complexValueString != null) { - rowValues.add(StringEscapeUtils.escapeCsv(complexValueString)); - } else { - rowValues.add(""); - } - break; - case SQLXML: - if (value != null) { - rowValues.add(StringEscapeUtils.escapeCsv(((java.sql.SQLXML) value).getString())); - } else { - rowValues.add(""); - } - default: - if (value != null) { - rowValues.add(value.toString()); - } else { - rowValues.add(""); - } - } - } - // Write row values - outStream.write(StringUtils.join(rowValues, outputOptions.getDelimiter()).getBytes(StandardCharsets.UTF_8)); - outStream.write("\n".getBytes(StandardCharsets.UTF_8)); - nrOfRows++; - - if (maxRows > 0 && nrOfRows == maxRows) - break; - } - return nrOfRows; - } - - public static String normalizeNameForAvro(String inputName) { - String normalizedName = inputName.replaceAll("[^A-Za-z0-9_]", "_"); - if (Character.isDigit(normalizedName.charAt(0))) { - normalizedName = "_" + normalizedName; - } - return normalizedName; - } - - /** - * An interface for callback methods which allows processing of a row during the convertToXYZStream() processing. - * IMPORTANT: This method should only work on the row pointed at by the current ResultSet reference. - * Advancing the cursor (e.g.) can cause rows to be skipped during Avro transformation. - */ - public interface ResultSetRowCallback { - void processRow(ResultSet resultSet) throws IOException; - } - - public static Configuration getConfigurationFromFiles(final String configFiles) { - final Configuration hiveConfig = new HiveConf(); - if (StringUtils.isNotBlank(configFiles)) { - for (final String configFile : configFiles.split(",")) { - hiveConfig.addResource(new Path(configFile.trim())); - } - } - return hiveConfig; - } -} diff --git a/nifi-nar-bundles/nifi-hive-bundle/nifi-hive-processors/src/main/java/org/apache/nifi/util/hive/HiveOptions.java b/nifi-nar-bundles/nifi-hive-bundle/nifi-hive-processors/src/main/java/org/apache/nifi/util/hive/HiveOptions.java deleted file mode 100644 index ddc15673e5..0000000000 --- a/nifi-nar-bundles/nifi-hive-bundle/nifi-hive-processors/src/main/java/org/apache/nifi/util/hive/HiveOptions.java +++ /dev/null @@ -1,155 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - *

- * http://www.apache.org/licenses/LICENSE-2.0 - *

- * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.nifi.util.hive; - -import java.io.Serializable; - - -public class HiveOptions implements Serializable { - /** - * Half of the default Config.TOPOLOGY_MESSAGE_TIMEOUT_SECS - */ - private static final int DEFAULT_TICK_TUPLE_INTERVAL_SECS = 15; - - protected String databaseName; - protected String tableName; - protected String metaStoreURI; - protected Integer txnsPerBatch = 100; - protected Integer maxOpenConnections = 10; - protected Integer batchSize = 15000; - protected Integer idleTimeout = 60000; - protected Integer callTimeout = 0; - protected Integer heartBeatInterval = 60; - protected Boolean autoCreatePartitions = true; - protected String kerberosPrincipal; - protected String kerberosKeytab; - protected Integer tickTupleInterval = DEFAULT_TICK_TUPLE_INTERVAL_SECS; - - public HiveOptions(String metaStoreURI, String databaseName, String tableName) { - this.metaStoreURI = metaStoreURI; - this.databaseName = databaseName; - this.tableName = tableName; - } - - public HiveOptions withTickTupleInterval(Integer tickInterval) { - this.tickTupleInterval = tickInterval; - return this; - } - - public HiveOptions withTxnsPerBatch(Integer txnsPerBatch) { - this.txnsPerBatch = txnsPerBatch; - return this; - } - - public HiveOptions withMaxOpenConnections(Integer maxOpenConnections) { - this.maxOpenConnections = maxOpenConnections; - return this; - } - - public HiveOptions withBatchSize(Integer batchSize) { - this.batchSize = batchSize; - return this; - } - - public HiveOptions withIdleTimeout(Integer idleTimeout) { - this.idleTimeout = idleTimeout; - return this; - } - - public HiveOptions withCallTimeout(Integer callTimeout) { - this.callTimeout = callTimeout; - return this; - } - - public HiveOptions withHeartBeatInterval(Integer heartBeatInterval) { - this.heartBeatInterval = heartBeatInterval; - return this; - } - - public HiveOptions withAutoCreatePartitions(Boolean autoCreatePartitions) { - this.autoCreatePartitions = autoCreatePartitions; - return this; - } - - public HiveOptions withKerberosKeytab(String kerberosKeytab) { - this.kerberosKeytab = kerberosKeytab; - return this; - } - - public HiveOptions withKerberosPrincipal(String kerberosPrincipal) { - this.kerberosPrincipal = kerberosPrincipal; - return this; - } - - public String getMetaStoreURI() { - return metaStoreURI; - } - - public String getDatabaseName() { - return databaseName; - } - - public String getTableName() { - return tableName; - } - - public String getQualifiedTableName() { - return databaseName + "." + tableName; - } - - public Integer getBatchSize() { - return batchSize; - } - - public Integer getCallTimeOut() { - return callTimeout; - } - - public Integer getHeartBeatInterval() { - return heartBeatInterval; - } - - public Integer getMaxOpenConnections() { - return maxOpenConnections; - } - - public Integer getIdleTimeout() { - return idleTimeout; - } - - public Integer getTxnsPerBatch() { - return txnsPerBatch; - } - - public Boolean getAutoCreatePartitions() { - return autoCreatePartitions; - } - - public String getKerberosPrincipal() { - return kerberosPrincipal; - } - - public String getKerberosKeytab() { - return kerberosKeytab; - } - - public Integer getTickTupleInterval() { - return tickTupleInterval; - } -} diff --git a/nifi-nar-bundles/nifi-hive-bundle/nifi-hive-processors/src/main/java/org/apache/nifi/util/hive/HiveUtils.java b/nifi-nar-bundles/nifi-hive-bundle/nifi-hive-processors/src/main/java/org/apache/nifi/util/hive/HiveUtils.java deleted file mode 100644 index c58010c03d..0000000000 --- a/nifi-nar-bundles/nifi-hive-bundle/nifi-hive-processors/src/main/java/org/apache/nifi/util/hive/HiveUtils.java +++ /dev/null @@ -1,50 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.nifi.util.hive; - -import org.apache.hadoop.hive.conf.HiveConf; -import org.apache.hadoop.security.UserGroupInformation; -import org.apache.hive.hcatalog.streaming.ConnectionError; -import org.apache.hive.hcatalog.streaming.HiveEndPoint; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import java.util.List; -import java.util.Map; -import java.util.concurrent.ExecutorService; - -public class HiveUtils { - private static final Logger LOG = LoggerFactory.getLogger(HiveUtils.class); - - public static HiveEndPoint makeEndPoint(List partitionVals, HiveOptions options) throws ConnectionError { - return new HiveEndPoint(options.getMetaStoreURI(), options.getDatabaseName(), options.getTableName(), partitionVals); - } - - public static HiveWriter makeHiveWriter(HiveEndPoint endPoint, ExecutorService callTimeoutPool, UserGroupInformation ugi, HiveOptions options, HiveConf hiveConf) - throws HiveWriter.ConnectFailure, InterruptedException { - return new HiveWriter(endPoint, options.getTxnsPerBatch(), options.getAutoCreatePartitions(), - options.getCallTimeOut(), callTimeoutPool, ugi, hiveConf); - } - - public static void logAllHiveEndPoints(Map allWriters) { - for (Map.Entry entry : allWriters.entrySet()) { - LOG.info("cached writers {} ", entry.getValue()); - } - } -} diff --git a/nifi-nar-bundles/nifi-hive-bundle/nifi-hive-processors/src/main/java/org/apache/nifi/util/hive/HiveWriter.java b/nifi-nar-bundles/nifi-hive-bundle/nifi-hive-processors/src/main/java/org/apache/nifi/util/hive/HiveWriter.java deleted file mode 100644 index e5ab48ffb6..0000000000 --- a/nifi-nar-bundles/nifi-hive-bundle/nifi-hive-processors/src/main/java/org/apache/nifi/util/hive/HiveWriter.java +++ /dev/null @@ -1,462 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - *

- * http://www.apache.org/licenses/LICENSE-2.0 - *

- * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.nifi.util.hive; - -import org.apache.hadoop.hive.conf.HiveConf; -import org.apache.hadoop.security.UserGroupInformation; -import org.apache.hive.hcatalog.streaming.HiveEndPoint; -import org.apache.hive.hcatalog.streaming.RecordWriter; -import org.apache.hive.hcatalog.streaming.SerializationError; -import org.apache.hive.hcatalog.streaming.StreamingConnection; -import org.apache.hive.hcatalog.streaming.StreamingException; -import org.apache.hive.hcatalog.streaming.StreamingIOFailure; -import org.apache.hive.hcatalog.streaming.StrictJsonWriter; -import org.apache.hive.hcatalog.streaming.TransactionBatch; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import java.io.IOException; -import java.lang.reflect.UndeclaredThrowableException; -import java.security.PrivilegedExceptionAction; -import java.util.List; -import java.util.concurrent.ExecutionException; -import java.util.concurrent.ExecutorService; -import java.util.concurrent.Future; -import java.util.concurrent.TimeUnit; -import java.util.concurrent.TimeoutException; - - -public class HiveWriter { - - private static final Logger LOG = LoggerFactory.getLogger(HiveWriter.class); - - private final HiveEndPoint endPoint; - private final StreamingConnection connection; - private final int txnsPerBatch; - private final RecordWriter recordWriter; - private final ExecutorService callTimeoutPool; - private final long callTimeout; - private final Object txnBatchLock = new Object(); - private final UserGroupInformation ugi; - private TransactionBatch txnBatch; - private long lastUsed; // time of last flush on this writer - protected boolean closed; // flag indicating HiveWriter was closed - private int totalRecords = 0; - - public HiveWriter(HiveEndPoint endPoint, int txnsPerBatch, boolean autoCreatePartitions, long callTimeout, ExecutorService callTimeoutPool, UserGroupInformation ugi, HiveConf hiveConf) - throws InterruptedException, ConnectFailure { - try { - this.ugi = ugi; - this.callTimeout = callTimeout; - this.callTimeoutPool = callTimeoutPool; - this.endPoint = endPoint; - this.connection = newConnection(endPoint, autoCreatePartitions, hiveConf, ugi); - this.txnsPerBatch = txnsPerBatch; - this.recordWriter = getRecordWriter(endPoint, ugi, hiveConf); - this.txnBatch = nextTxnBatch(recordWriter); - this.closed = false; - this.lastUsed = System.currentTimeMillis(); - } catch (InterruptedException | RuntimeException | ConnectFailure e) { - throw e; - } catch (Exception e) { - throw new ConnectFailure(endPoint, e); - } - } - - protected RecordWriter getRecordWriter(HiveEndPoint endPoint, UserGroupInformation ugi, HiveConf hiveConf) throws StreamingException, IOException, InterruptedException { - if (ugi == null) { - return new StrictJsonWriter(endPoint, hiveConf); - } else { - try { - return ugi.doAs((PrivilegedExceptionAction) () -> new StrictJsonWriter(endPoint, hiveConf)); - } catch (UndeclaredThrowableException e) { - Throwable cause = e.getCause(); - if (cause instanceof StreamingException) { - throw (StreamingException) cause; - } else { - throw e; - } - } - } - } - - @Override - public String toString() { - return "{ endPoint = " + endPoint + ", TransactionBatch = " + txnBatch + " }"; - } - - /** - * Write the record data to Hive - * - * @throws IOException if an error occurs during the write - * @throws InterruptedException if the write operation is interrupted - */ - public synchronized void write(final byte[] record) - throws WriteFailure, SerializationError, InterruptedException { - if (closed) { - throw new IllegalStateException("This hive streaming writer was closed " + - "and thus no longer able to write : " + endPoint); - } - // write the tuple - try { - LOG.debug("Writing event to {}", endPoint); - callWithTimeout(new CallRunner() { - @Override - public Void call() throws StreamingException, InterruptedException { - txnBatch.write(record); - totalRecords++; - return null; - } - }); - } catch (SerializationError se) { - throw new SerializationError(endPoint.toString() + " SerializationError", se); - } catch (StreamingException | TimeoutException e) { - throw new WriteFailure(endPoint, txnBatch.getCurrentTxnId(), e); - } - } - - /** - * Commits the current Txn if totalRecordsPerTransaction > 0 . - * If 'rollToNext' is true, will switch to next Txn in batch or to a - * new TxnBatch if current Txn batch is exhausted - */ - public void flush(boolean rollToNext) - throws CommitFailure, TxnBatchFailure, TxnFailure, InterruptedException { - // if there are no records do not call flush - if (totalRecords <= 0) return; - try { - synchronized (txnBatchLock) { - commitTxn(); - nextTxn(rollToNext); - totalRecords = 0; - lastUsed = System.currentTimeMillis(); - } - } catch (StreamingException e) { - throw new TxnFailure(txnBatch, e); - } - } - - /** Queues up a heartbeat request on the current and remaining txns using the - * heartbeatThdPool and returns immediately - */ - public void heartBeat() throws InterruptedException { - // 1) schedule the heartbeat on one thread in pool - synchronized (txnBatchLock) { - try { - callWithTimeout(new CallRunner() { - @Override - public Void call() throws Exception { - try { - LOG.info("Sending heartbeat on batch " + txnBatch); - txnBatch.heartbeat(); - } catch (StreamingException e) { - LOG.warn("Heartbeat error on batch " + txnBatch, e); - } - return null; - } - }); - } catch (InterruptedException e) { - throw e; - } catch (Exception e) { - LOG.warn("Unable to send heartbeat on Txn Batch " + txnBatch, e); - // Suppressing exceptions as we don't care for errors on heartbeats - } - } - } - - /** - * Returns totalRecords written so far in a transaction - * @returns totalRecords - */ - public int getTotalRecords() { - return totalRecords; - } - - /** - * Flush and Close current transactionBatch. - */ - public void flushAndClose() throws TxnBatchFailure, TxnFailure, CommitFailure, - IOException, InterruptedException { - flush(false); - close(); - } - - /** - * Close the Transaction Batch and connection - * @throws IOException if an error occurs during close - * @throws InterruptedException if the close operation is interrupted - */ - public void close() throws IOException, InterruptedException { - closeTxnBatch(); - closeConnection(); - closed = true; - } - - protected void closeConnection() throws InterruptedException { - LOG.info("Closing connection to end point : {}", endPoint); - try { - callWithTimeout(new CallRunner() { - @Override - public Void call() throws Exception { - connection.close(); // could block - return null; - } - }); - } catch (Exception e) { - LOG.warn("Error closing connection to EndPoint : " + endPoint, e); - // Suppressing exceptions as we don't care for errors on connection close - } - } - - protected void commitTxn() throws CommitFailure, InterruptedException { - LOG.debug("Committing Txn id {} to {}", txnBatch.getCurrentTxnId(), endPoint); - try { - callWithTimeout(new CallRunner() { - @Override - public Void call() throws Exception { - txnBatch.commit(); // could block - return null; - } - }); - } catch (StreamingException | TimeoutException e) { - throw new CommitFailure(endPoint, txnBatch.getCurrentTxnId(), e); - } - } - - protected StreamingConnection newConnection(HiveEndPoint endPoint, boolean autoCreatePartitions, HiveConf conf, UserGroupInformation ugi) throws InterruptedException, ConnectFailure { - try { - return callWithTimeout(() -> { - return endPoint.newConnection(autoCreatePartitions, conf, ugi); // could block - }); - } catch (StreamingException | TimeoutException e) { - throw new ConnectFailure(endPoint, e); - } - } - - protected TransactionBatch nextTxnBatch(final RecordWriter recordWriter) - throws InterruptedException, TxnBatchFailure { - LOG.debug("Fetching new Txn Batch for {}", endPoint); - TransactionBatch batch = null; - try { - batch = callWithTimeout(() -> { - return connection.fetchTransactionBatch(txnsPerBatch, recordWriter); // could block - }); - batch.beginNextTransaction(); - LOG.debug("Acquired {}. Switching to first txn", batch); - } catch (TimeoutException | StreamingException e) { - throw new TxnBatchFailure(endPoint, e); - } - return batch; - } - - protected void closeTxnBatch() throws InterruptedException { - try { - LOG.debug("Closing Txn Batch {}", txnBatch); - callWithTimeout(new CallRunner() { - @Override - public Void call() throws Exception { - if (txnBatch != null) { - txnBatch.close(); // could block - } - return null; - } - }); - } catch (InterruptedException e) { - throw e; - } catch (Exception e) { - LOG.warn("Error closing txn batch " + txnBatch, e); - } - } - - /** - * Aborts the current Txn and switches to next Txn. - * @throws StreamingException if could not get new Transaction Batch, or switch to next Txn - */ - public void abort() throws StreamingException, TxnBatchFailure, InterruptedException { - synchronized (txnBatchLock) { - abortTxn(); - nextTxn(true); // roll to next - } - } - - - /** - * Aborts current Txn in the txnBatch. - */ - protected void abortTxn() throws InterruptedException { - LOG.info("Aborting Txn id {} on End Point {}", txnBatch.getCurrentTxnId(), endPoint); - try { - callWithTimeout(new CallRunner() { - @Override - public Void call() throws StreamingException, InterruptedException { - txnBatch.abort(); // could block - return null; - } - }); - } catch (InterruptedException e) { - throw e; - } catch (TimeoutException e) { - LOG.warn("Timeout while aborting Txn " + txnBatch.getCurrentTxnId() + " on EndPoint: " + endPoint, e); - } catch (Exception e) { - LOG.warn("Error aborting Txn " + txnBatch.getCurrentTxnId() + " on EndPoint: " + endPoint, e); - // Suppressing exceptions as we don't care for errors on abort - } - } - - - /** - * if there are remainingTransactions in current txnBatch, begins nextTransactions - * otherwise creates new txnBatch. - * @param rollToNext Whether to roll to the next transaction batch - */ - protected void nextTxn(boolean rollToNext) throws StreamingException, InterruptedException, TxnBatchFailure { - if (txnBatch.remainingTransactions() == 0) { - closeTxnBatch(); - txnBatch = null; - if (rollToNext) { - txnBatch = nextTxnBatch(recordWriter); - } - } else if (rollToNext) { - LOG.debug("Switching to next Txn for {}", endPoint); - txnBatch.beginNextTransaction(); // does not block - } - } - - /** - * If the current thread has been interrupted, then throws an - * exception. - * @throws InterruptedException uf the current thread has been interrupted - */ - protected static void checkAndThrowInterruptedException() - throws InterruptedException { - if (Thread.currentThread().interrupted()) { - throw new InterruptedException("Timed out before Hive call was made. " - + "Your callTimeout might be set too low or Hive calls are " - + "taking too long."); - } - } - - /** - * Execute the callable on a separate thread and wait for the completion - * for the specified amount of time in milliseconds. In case of timeout - * cancel the callable and throw an IOException - */ - private T callWithTimeout(final CallRunner callRunner) - throws TimeoutException, StreamingException, InterruptedException { - Future future = callTimeoutPool.submit(() -> { - if (ugi == null) { - return callRunner.call(); - } - try { - return ugi.doAs((PrivilegedExceptionAction) () -> callRunner.call()); - } catch (UndeclaredThrowableException e) { - Throwable cause = e.getCause(); - // Unwrap exception so it is thrown the same way as without ugi - if (!(cause instanceof Exception)) { - throw e; - } - throw (Exception)cause; - } - }); - try { - if (callTimeout > 0) { - return future.get(callTimeout, TimeUnit.MILLISECONDS); - } else { - return future.get(); - } - } catch (TimeoutException eT) { - future.cancel(true); - throw eT; - } catch (ExecutionException e1) { - Throwable cause = e1.getCause(); - if (cause instanceof IOException) { - throw new StreamingIOFailure("I/O Failure", (IOException) cause); - } else if (cause instanceof StreamingException) { - throw (StreamingException) cause; - } else if (cause instanceof InterruptedException) { - throw (InterruptedException) cause; - } else if (cause instanceof RuntimeException) { - throw (RuntimeException) cause; - } else if (cause instanceof TimeoutException) { - throw new StreamingException("Operation Timed Out.", (TimeoutException) cause); - } else { - throw new RuntimeException(e1); - } - } - } - - public long getLastUsed() { - return lastUsed; - } - - private byte[] generateRecord(List tuple) { - StringBuilder buf = new StringBuilder(); - for (String o : tuple) { - buf.append(o); - buf.append(","); - } - return buf.toString().getBytes(); - } - - /** - * Simple interface whose call method is called by - * {#callWithTimeout} in a new thread inside a - * {@linkplain java.security.PrivilegedExceptionAction#run()} call. - * @param the type of object returned from the call - */ - private interface CallRunner { - T call() throws Exception; - } - - public static class Failure extends Exception { - public Failure(String message, Throwable cause) { - super(message, cause); - } - } - - public static class WriteFailure extends Failure { - public WriteFailure(HiveEndPoint endPoint, Long currentTxnId, Throwable cause) { - super("Failed writing to : " + endPoint + ". TxnID : " + currentTxnId, cause); - } - } - - public static class CommitFailure extends Failure { - public CommitFailure(HiveEndPoint endPoint, Long txnID, Throwable cause) { - super("Commit of Txn " + txnID + " failed on EndPoint: " + endPoint, cause); - } - } - - public static class ConnectFailure extends Failure { - public ConnectFailure(HiveEndPoint ep, Throwable cause) { - super("Failed connecting to EndPoint " + ep, cause); - } - } - - public static class TxnBatchFailure extends Failure { - public TxnBatchFailure(HiveEndPoint ep, Throwable cause) { - super("Failed acquiring Transaction Batch from EndPoint: " + ep, cause); - } - } - - public static class TxnFailure extends Failure { - public TxnFailure(TransactionBatch txnBatch, Throwable cause) { - super("Failed switching to next Txn in TxnBatch " + txnBatch, cause); - } - } -} diff --git a/nifi-nar-bundles/nifi-hive-bundle/nifi-hive-processors/src/main/java/org/apache/nifi/util/hive/ValidationResources.java b/nifi-nar-bundles/nifi-hive-bundle/nifi-hive-processors/src/main/java/org/apache/nifi/util/hive/ValidationResources.java deleted file mode 100644 index 1014efb3c2..0000000000 --- a/nifi-nar-bundles/nifi-hive-bundle/nifi-hive-processors/src/main/java/org/apache/nifi/util/hive/ValidationResources.java +++ /dev/null @@ -1,41 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.nifi.util.hive; - -import org.apache.hadoop.conf.Configuration; - -/** - * A helper class for maintaining loaded configurations (to avoid reloading on use unless necessary) - */ -public class ValidationResources { - - private final String configResources; - private final Configuration configuration; - - public ValidationResources(String configResources, Configuration configuration) { - this.configResources = configResources; - this.configuration = configuration; - } - - public String getConfigResources() { - return configResources; - } - - public Configuration getConfiguration() { - return configuration; - } -} diff --git a/nifi-nar-bundles/nifi-hive-bundle/nifi-hive-processors/src/main/resources/META-INF/services/org.apache.nifi.controller.ControllerService b/nifi-nar-bundles/nifi-hive-bundle/nifi-hive-processors/src/main/resources/META-INF/services/org.apache.nifi.controller.ControllerService deleted file mode 100644 index 6a2936e326..0000000000 --- a/nifi-nar-bundles/nifi-hive-bundle/nifi-hive-processors/src/main/resources/META-INF/services/org.apache.nifi.controller.ControllerService +++ /dev/null @@ -1,15 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -org.apache.nifi.dbcp.hive.HiveConnectionPool \ No newline at end of file diff --git a/nifi-nar-bundles/nifi-hive-bundle/nifi-hive-processors/src/main/resources/META-INF/services/org.apache.nifi.processor.Processor b/nifi-nar-bundles/nifi-hive-bundle/nifi-hive-processors/src/main/resources/META-INF/services/org.apache.nifi.processor.Processor deleted file mode 100644 index d86ee38fe7..0000000000 --- a/nifi-nar-bundles/nifi-hive-bundle/nifi-hive-processors/src/main/resources/META-INF/services/org.apache.nifi.processor.Processor +++ /dev/null @@ -1,19 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -org.apache.nifi.processors.hive.ConvertAvroToORC -org.apache.nifi.processors.hive.SelectHiveQL -org.apache.nifi.processors.hive.PutHiveQL -org.apache.nifi.processors.hive.PutHiveStreaming -org.apache.nifi.processors.hive.UpdateHiveTable diff --git a/nifi-nar-bundles/nifi-hive-bundle/nifi-hive-processors/src/test/java/org/apache/nifi/dbcp/hive/HiveConnectionPoolTest.java b/nifi-nar-bundles/nifi-hive-bundle/nifi-hive-processors/src/test/java/org/apache/nifi/dbcp/hive/HiveConnectionPoolTest.java deleted file mode 100644 index 77c72a5e2b..0000000000 --- a/nifi-nar-bundles/nifi-hive-bundle/nifi-hive-processors/src/test/java/org/apache/nifi/dbcp/hive/HiveConnectionPoolTest.java +++ /dev/null @@ -1,201 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.nifi.dbcp.hive; - -import org.apache.commons.dbcp2.BasicDataSource; -import org.apache.hadoop.security.UserGroupInformation; -import org.apache.nifi.components.PropertyDescriptor; -import org.apache.nifi.controller.AbstractControllerService; -import org.apache.nifi.expression.ExpressionLanguageScope; -import org.apache.nifi.hadoop.KerberosProperties; -import org.apache.nifi.logging.ComponentLog; -import org.apache.nifi.processor.exception.ProcessException; -import org.apache.nifi.processor.util.StandardValidators; -import org.apache.nifi.registry.VariableDescriptor; -import org.apache.nifi.reporting.InitializationException; -import org.apache.nifi.util.MockConfigurationContext; -import org.apache.nifi.util.MockVariableRegistry; -import org.junit.jupiter.api.BeforeEach; -import org.junit.jupiter.api.Test; -import org.junit.jupiter.api.condition.EnabledIfSystemProperty; - -import java.io.File; -import java.io.IOException; -import java.lang.reflect.Field; -import java.lang.reflect.UndeclaredThrowableException; -import java.security.PrivilegedExceptionAction; -import java.sql.SQLException; -import java.util.HashMap; -import java.util.Map; - -import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.assertThrows; -import static org.mockito.ArgumentMatchers.isA; -import static org.mockito.Mockito.mock; -import static org.mockito.Mockito.when; - -public class HiveConnectionPoolTest { - private UserGroupInformation userGroupInformation; - private HiveConnectionPool hiveConnectionPool; - private BasicDataSource basicDataSource; - private ComponentLog componentLog; - private KerberosProperties kerberosProperties; - private File krb5conf = new File("src/test/resources/krb5.conf"); - - @BeforeEach - public void setup() throws Exception { - // have to initialize this system property before anything else - System.setProperty("java.security.krb5.conf", krb5conf.getAbsolutePath()); - System.setProperty("java.security.krb5.realm", "nifi.com"); - System.setProperty("java.security.krb5.kdc", "nifi.kdc"); - - userGroupInformation = mock(UserGroupInformation.class); - basicDataSource = mock(BasicDataSource.class); - componentLog = mock(ComponentLog.class); - kerberosProperties = mock(KerberosProperties.class); - - when(userGroupInformation.doAs(isA(PrivilegedExceptionAction.class))).thenAnswer(invocation -> { - try { - return ((PrivilegedExceptionAction) invocation.getArguments()[0]).run(); - } catch (IOException | Error | RuntimeException | InterruptedException e) { - throw e; - } catch (Throwable e) { - throw new UndeclaredThrowableException(e); - } - }); - - when(kerberosProperties.getKerberosKeytab()).thenReturn(new PropertyDescriptor.Builder() - .name("Kerberos Principal") - .addValidator(StandardValidators.ATTRIBUTE_EXPRESSION_LANGUAGE_VALIDATOR) - .expressionLanguageSupported(ExpressionLanguageScope.VARIABLE_REGISTRY) - .build()); - - when(kerberosProperties.getKerberosPrincipal()).thenReturn(new PropertyDescriptor.Builder() - .name("Kerberos Keytab") - .addValidator(StandardValidators.ATTRIBUTE_EXPRESSION_LANGUAGE_VALIDATOR) - .expressionLanguageSupported(ExpressionLanguageScope.VARIABLE_REGISTRY) - .build()); - - initPool(); - } - - private void initPool() throws Exception { - hiveConnectionPool = new HiveConnectionPool(); - - Field ugiField = HiveConnectionPool.class.getDeclaredField("ugi"); - ugiField.setAccessible(true); - ugiField.set(hiveConnectionPool, userGroupInformation); - - Field dataSourceField = HiveConnectionPool.class.getDeclaredField("dataSource"); - dataSourceField.setAccessible(true); - dataSourceField.set(hiveConnectionPool, basicDataSource); - - Field componentLogField = AbstractControllerService.class.getDeclaredField("logger"); - componentLogField.setAccessible(true); - componentLogField.set(hiveConnectionPool, componentLog); - - Field kerberosPropertiesField = HiveConnectionPool.class.getDeclaredField("kerberosProperties"); - kerberosPropertiesField.setAccessible(true); - kerberosPropertiesField.set(hiveConnectionPool, kerberosProperties); - } - - @Test - public void testGetConnectionSqlException() throws SQLException { - SQLException sqlException = new SQLException("bad sql"); - when(basicDataSource.getConnection()).thenThrow(sqlException); - - ProcessException e = assertThrows(ProcessException.class, () -> hiveConnectionPool.getConnection()); - assertEquals(sqlException, e.getCause()); - } - - @Test - public void testExpressionLanguageSupport() throws Exception { - final String URL = "jdbc:hive2://localhost:10000/default"; - final String USER = "user"; - final String PASS = "pass"; - final int MAX_CONN = 7; - final String MAX_CONN_LIFETIME = "1 sec"; - final String MAX_WAIT = "10 sec"; // 10000 milliseconds - final String CONF = "/path/to/hive-site.xml"; - hiveConnectionPool = new HiveConnectionPool(); - - Map props = new HashMap() {{ - put(HiveConnectionPool.DATABASE_URL, "${url}"); - put(HiveConnectionPool.DB_USER, "${username}"); - put(HiveConnectionPool.DB_PASSWORD, "${password}"); - put(HiveConnectionPool.MAX_TOTAL_CONNECTIONS, "${maxconn}"); - put(HiveConnectionPool.MAX_CONN_LIFETIME, "${maxconnlifetime}"); - put(HiveConnectionPool.MAX_WAIT_TIME, "${maxwait}"); - put(HiveConnectionPool.HIVE_CONFIGURATION_RESOURCES, "${hiveconf}"); - }}; - - MockVariableRegistry registry = new MockVariableRegistry(); - registry.setVariable(new VariableDescriptor("url"), URL); - registry.setVariable(new VariableDescriptor("username"), USER); - registry.setVariable(new VariableDescriptor("password"), PASS); - registry.setVariable(new VariableDescriptor("maxconn"), Integer.toString(MAX_CONN)); - registry.setVariable(new VariableDescriptor("maxconnlifetime"), MAX_CONN_LIFETIME); - registry.setVariable(new VariableDescriptor("maxwait"), MAX_WAIT); - registry.setVariable(new VariableDescriptor("hiveconf"), CONF); - - - MockConfigurationContext context = new MockConfigurationContext(props, null, registry); - hiveConnectionPool.onConfigured(context); - - Field dataSourceField = HiveConnectionPool.class.getDeclaredField("dataSource"); - dataSourceField.setAccessible(true); - basicDataSource = (BasicDataSource) dataSourceField.get(hiveConnectionPool); - assertEquals(URL, basicDataSource.getUrl()); - assertEquals(USER, basicDataSource.getUsername()); - assertEquals(PASS, basicDataSource.getPassword()); - assertEquals(MAX_CONN, basicDataSource.getMaxTotal()); - assertEquals(1000L, basicDataSource.getMaxConnLifetimeMillis()); - assertEquals(10000L, basicDataSource.getMaxWaitMillis()); - assertEquals(URL, hiveConnectionPool.getConnectionURL()); - } - - @EnabledIfSystemProperty( - named = "nifi.test.unstable", - matches = "true", - disabledReason = "Kerberos does not seem to be properly handled in Travis build, but, locally, this test should successfully run") - @Test - public void testKerberosAuthException() { - final String URL = "jdbc:hive2://localhost:10000/default"; - final String conf = "src/test/resources/hive-site-security.xml"; - final String ktab = "src/test/resources/fake.keytab"; - final String kprinc = "bad@PRINCIPAL.COM"; - - KerberosProperties kerbProperties = new KerberosProperties(krb5conf); - - Map props = new HashMap() {{ - put(HiveConnectionPool.DATABASE_URL, "${url}"); - put(HiveConnectionPool.HIVE_CONFIGURATION_RESOURCES, "${conf}"); - put(kerbProperties.getKerberosKeytab(), "${ktab}"); - put(kerbProperties.getKerberosPrincipal(), "${kprinc}"); - }}; - - MockVariableRegistry registry = new MockVariableRegistry(); - registry.setVariable(new VariableDescriptor("url"), URL); - registry.setVariable(new VariableDescriptor("conf"), conf); - registry.setVariable(new VariableDescriptor("ktab"), ktab); - registry.setVariable(new VariableDescriptor("kprinc"), kprinc); - - MockConfigurationContext context = new MockConfigurationContext(props, null, registry); - assertThrows(InitializationException.class, () -> hiveConnectionPool.onConfigured(context)); - } -} diff --git a/nifi-nar-bundles/nifi-hive-bundle/nifi-hive-processors/src/test/java/org/apache/nifi/processors/hive/TestConvertAvroToORC.java b/nifi-nar-bundles/nifi-hive-bundle/nifi-hive-processors/src/test/java/org/apache/nifi/processors/hive/TestConvertAvroToORC.java deleted file mode 100644 index da5f18bd2d..0000000000 --- a/nifi-nar-bundles/nifi-hive-bundle/nifi-hive-processors/src/test/java/org/apache/nifi/processors/hive/TestConvertAvroToORC.java +++ /dev/null @@ -1,568 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.nifi.processors.hive; - -import org.apache.avro.Schema; -import org.apache.avro.file.DataFileStream; -import org.apache.avro.file.DataFileWriter; -import org.apache.avro.generic.GenericData; -import org.apache.avro.generic.GenericDatumReader; -import org.apache.avro.generic.GenericDatumWriter; -import org.apache.avro.generic.GenericRecord; -import org.apache.avro.io.DatumReader; -import org.apache.avro.io.DatumWriter; -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.FileSystem; -import org.apache.hadoop.fs.Path; -import org.apache.hadoop.hive.ql.io.orc.NiFiOrcUtils; -import org.apache.hadoop.hive.ql.io.orc.OrcFile; -import org.apache.hadoop.hive.ql.io.orc.OrcStruct; -import org.apache.hadoop.hive.ql.io.orc.Reader; -import org.apache.hadoop.hive.ql.io.orc.RecordReader; -import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable; -import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; -import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; -import org.apache.hadoop.io.DoubleWritable; -import org.apache.hadoop.io.IntWritable; -import org.apache.hadoop.io.Text; -import org.apache.nifi.flowfile.attributes.CoreAttributes; -import org.apache.nifi.util.MockFlowFile; -import org.apache.nifi.util.TestRunner; -import org.apache.nifi.util.TestRunners; -import org.apache.nifi.util.orc.TestNiFiOrcUtils; -import org.junit.jupiter.api.BeforeEach; -import org.junit.jupiter.api.Test; - -import java.io.ByteArrayInputStream; -import java.io.ByteArrayOutputStream; -import java.io.File; -import java.io.FileOutputStream; -import java.io.InputStream; -import java.math.BigDecimal; -import java.nio.ByteBuffer; -import java.nio.charset.StandardCharsets; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.HashMap; -import java.util.LinkedList; -import java.util.List; -import java.util.Map; -import java.util.TreeMap; - -import static org.junit.jupiter.api.Assertions.assertArrayEquals; -import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.assertNotNull; -import static org.junit.jupiter.api.Assertions.assertTrue; - - -/** - * Unit tests for ConvertAvroToORC processor - */ -public class TestConvertAvroToORC { - - private ConvertAvroToORC processor; - private TestRunner runner; - - @BeforeEach - public void setUp() throws Exception { - processor = new ConvertAvroToORC(); - runner = TestRunners.newTestRunner(processor); - } - - @Test - public void test_onTrigger_routing_to_failure_null_type() throws Exception { - String testString = "Hello World"; - GenericData.Record record = TestNiFiOrcUtils.buildAvroRecordWithNull(testString); - - DatumWriter writer = new GenericDatumWriter<>(record.getSchema()); - DataFileWriter fileWriter = new DataFileWriter<>(writer); - ByteArrayOutputStream out = new ByteArrayOutputStream(); - fileWriter.create(record.getSchema(), out); - fileWriter.append(record); - fileWriter.flush(); - fileWriter.close(); - out.close(); - - Map attributes = new HashMap() {{ - put(CoreAttributes.FILENAME.key(), "test.avro"); - }}; - runner.enqueue(out.toByteArray(), attributes); - runner.run(); - - runner.assertAllFlowFilesTransferred(ConvertAvroToORC.REL_SUCCESS, 1); - MockFlowFile resultFlowFile = runner.getFlowFilesForRelationship(ConvertAvroToORC.REL_SUCCESS).get(0); - assertEquals("test.orc", resultFlowFile.getAttribute(CoreAttributes.FILENAME.key())); - assertEquals("CREATE EXTERNAL TABLE IF NOT EXISTS test_record (string STRING, null BOOLEAN) STORED AS ORC", - resultFlowFile.getAttribute(ConvertAvroToORC.HIVE_DDL_ATTRIBUTE)); - } - - @Test - public void test_onTrigger_routing_to_failure_empty_array_type() throws Exception { - String testString = "Hello World"; - GenericData.Record record = TestNiFiOrcUtils.buildAvroRecordWithEmptyArray(testString); - - DatumWriter writer = new GenericDatumWriter<>(record.getSchema()); - DataFileWriter fileWriter = new DataFileWriter<>(writer); - ByteArrayOutputStream out = new ByteArrayOutputStream(); - fileWriter.create(record.getSchema(), out); - fileWriter.append(record); - fileWriter.flush(); - fileWriter.close(); - out.close(); - - Map attributes = new HashMap() {{ - put(CoreAttributes.FILENAME.key(), "test.avro"); - }}; - runner.enqueue(out.toByteArray(), attributes); - runner.run(); - - runner.assertAllFlowFilesTransferred(ConvertAvroToORC.REL_SUCCESS, 1); - MockFlowFile resultFlowFile = runner.getFlowFilesForRelationship(ConvertAvroToORC.REL_SUCCESS).get(0); - assertEquals("test.orc", resultFlowFile.getAttribute(CoreAttributes.FILENAME.key())); - assertEquals("CREATE EXTERNAL TABLE IF NOT EXISTS test_record (string STRING, emptyArray ARRAY) STORED AS ORC", - resultFlowFile.getAttribute(ConvertAvroToORC.HIVE_DDL_ATTRIBUTE)); - } - - @Test - public void test_onTrigger_routing_to_failure_fixed_type() throws Exception { - String testString = "Hello!"; - GenericData.Record record = TestNiFiOrcUtils.buildAvroRecordWithFixed(testString); - - DatumWriter writer = new GenericDatumWriter<>(record.getSchema()); - DataFileWriter fileWriter = new DataFileWriter<>(writer); - ByteArrayOutputStream out = new ByteArrayOutputStream(); - fileWriter.create(record.getSchema(), out); - fileWriter.append(record); - fileWriter.flush(); - fileWriter.close(); - out.close(); - - Map attributes = new HashMap() {{ - put(CoreAttributes.FILENAME.key(), "test.avro"); - }}; - runner.enqueue(out.toByteArray(), attributes); - runner.run(); - - runner.assertAllFlowFilesTransferred(ConvertAvroToORC.REL_FAILURE, 1); - MockFlowFile resultFlowFile = runner.getFlowFilesForRelationship(ConvertAvroToORC.REL_FAILURE).get(0); - assertEquals("test.avro", resultFlowFile.getAttribute(CoreAttributes.FILENAME.key())); - - final InputStream in = new ByteArrayInputStream(resultFlowFile.toByteArray()); - final DatumReader datumReader = new GenericDatumReader<>(); - try (DataFileStream dataFileReader = new DataFileStream<>(in, datumReader)) { - assertTrue(dataFileReader.hasNext()); - GenericRecord testedRecord = dataFileReader.next(); - - assertNotNull(testedRecord.get("fixed")); - assertArrayEquals(testString.getBytes(StandardCharsets.UTF_8), ((GenericData.Fixed) testedRecord.get("fixed")).bytes()); - } - } - - @Test - public void test_onTrigger_primitive_record() throws Exception { - GenericData.Record record = TestNiFiOrcUtils.buildPrimitiveAvroRecord(10, 20L, true, 30.0f, 40, StandardCharsets.UTF_8.encode("Hello"), "World"); - - DatumWriter writer = new GenericDatumWriter<>(record.getSchema()); - DataFileWriter fileWriter = new DataFileWriter<>(writer); - ByteArrayOutputStream out = new ByteArrayOutputStream(); - fileWriter.create(record.getSchema(), out); - fileWriter.append(record); - // Put another record in - record = TestNiFiOrcUtils.buildPrimitiveAvroRecord(1, 2L, false, 3.0f, 4L, StandardCharsets.UTF_8.encode("I am"), "another record"); - fileWriter.append(record); - // And one more - record = TestNiFiOrcUtils.buildPrimitiveAvroRecord(100, 200L, true, 300.0f, 400L, StandardCharsets.UTF_8.encode("Me"), "too!"); - fileWriter.append(record); - fileWriter.flush(); - fileWriter.close(); - out.close(); - Map attributes = new HashMap() {{ - put(CoreAttributes.FILENAME.key(), "test.avro"); - }}; - runner.enqueue(out.toByteArray(), attributes); - runner.run(); - - runner.assertAllFlowFilesTransferred(ConvertAvroToORC.REL_SUCCESS, 1); - - // Write the flow file out to disk, since the ORC Reader needs a path - MockFlowFile resultFlowFile = runner.getFlowFilesForRelationship(ConvertAvroToORC.REL_SUCCESS).get(0); - assertEquals("CREATE EXTERNAL TABLE IF NOT EXISTS test_record (int INT, long BIGINT, boolean BOOLEAN, float FLOAT, double DOUBLE, bytes BINARY, string STRING)" - + " STORED AS ORC", resultFlowFile.getAttribute(ConvertAvroToORC.HIVE_DDL_ATTRIBUTE)); - assertEquals("3", resultFlowFile.getAttribute(ConvertAvroToORC.RECORD_COUNT_ATTRIBUTE)); - assertEquals("test.orc", resultFlowFile.getAttribute(CoreAttributes.FILENAME.key())); - byte[] resultContents = runner.getContentAsByteArray(resultFlowFile); - FileOutputStream fos = new FileOutputStream("target/test1.orc"); - fos.write(resultContents); - fos.flush(); - fos.close(); - - Configuration conf = new Configuration(); - FileSystem fs = FileSystem.getLocal(conf); - Reader reader = OrcFile.createReader(new Path("target/test1.orc"), OrcFile.readerOptions(conf).filesystem(fs)); - RecordReader rows = reader.rows(); - Object o = rows.next(null); - assertNotNull(o); - assertTrue(o instanceof OrcStruct); - TypeInfo resultSchema = TestNiFiOrcUtils.buildPrimitiveOrcSchema(); - StructObjectInspector inspector = (StructObjectInspector) OrcStruct.createObjectInspector(resultSchema); - - // Check some fields in the first row - Object intFieldObject = inspector.getStructFieldData(o, inspector.getStructFieldRef("int")); - assertTrue(intFieldObject instanceof IntWritable); - assertEquals(10, ((IntWritable) intFieldObject).get()); - Object stringFieldObject = inspector.getStructFieldData(o, inspector.getStructFieldRef("string")); - assertTrue(stringFieldObject instanceof Text); - assertEquals("World", stringFieldObject.toString()); - - } - - @Test - public void test_onTrigger_complex_record() throws Exception { - - Map mapData1 = new TreeMap() {{ - put("key1", 1.0); - put("key2", 2.0); - }}; - - BigDecimal sampleBigDecimal = new BigDecimal("12.34"); - ByteBuffer bigDecimalAsBytes = ByteBuffer.wrap(sampleBigDecimal.unscaledValue().toByteArray()); - GenericData.Record record = TestNiFiOrcUtils.buildComplexAvroRecord(10, mapData1, "DEF", 3.0f, Arrays.asList(10, 20), bigDecimalAsBytes); - - DatumWriter writer = new GenericDatumWriter<>(record.getSchema()); - DataFileWriter fileWriter = new DataFileWriter<>(writer); - ByteArrayOutputStream out = new ByteArrayOutputStream(); - fileWriter.create(record.getSchema(), out); - fileWriter.append(record); - - // Put another record in - Map mapData2 = new TreeMap() {{ - put("key1", 3.0); - put("key2", 4.0); - }}; - - record = TestNiFiOrcUtils.buildComplexAvroRecord(null, mapData2, "XYZ", 4L, Arrays.asList(100, 200), bigDecimalAsBytes); - fileWriter.append(record); - - fileWriter.flush(); - fileWriter.close(); - out.close(); - - Map attributes = new HashMap() {{ - put(CoreAttributes.FILENAME.key(), "test"); - }}; - runner.enqueue(out.toByteArray(), attributes); - runner.run(); - - runner.assertAllFlowFilesTransferred(ConvertAvroToORC.REL_SUCCESS, 1); - - // Write the flow file out to disk, since the ORC Reader needs a path - MockFlowFile resultFlowFile = runner.getFlowFilesForRelationship(ConvertAvroToORC.REL_SUCCESS).get(0); - assertEquals("CREATE EXTERNAL TABLE IF NOT EXISTS complex_record " + - "(myInt INT, myMap MAP, myEnum STRING, myLongOrFloat UNIONTYPE, myIntList ARRAY, myDecimal DECIMAL(10,2))" - + " STORED AS ORC", resultFlowFile.getAttribute(ConvertAvroToORC.HIVE_DDL_ATTRIBUTE)); - assertEquals("2", resultFlowFile.getAttribute(ConvertAvroToORC.RECORD_COUNT_ATTRIBUTE)); - assertEquals("test.orc", resultFlowFile.getAttribute(CoreAttributes.FILENAME.key())); - byte[] resultContents = runner.getContentAsByteArray(resultFlowFile); - FileOutputStream fos = new FileOutputStream("target/test1.orc"); - fos.write(resultContents); - fos.flush(); - fos.close(); - - Configuration conf = new Configuration(); - FileSystem fs = FileSystem.getLocal(conf); - Reader reader = OrcFile.createReader(new Path("target/test1.orc"), OrcFile.readerOptions(conf).filesystem(fs)); - RecordReader rows = reader.rows(); - Object o = rows.next(null); - assertNotNull(o); - assertTrue(o instanceof OrcStruct); - TypeInfo resultSchema = TestNiFiOrcUtils.buildComplexOrcSchema(); - StructObjectInspector inspector = (StructObjectInspector) OrcStruct.createObjectInspector(resultSchema); - - // Check some fields in the first row - Object intFieldObject = inspector.getStructFieldData(o, inspector.getStructFieldRef("myInt")); - assertTrue(intFieldObject instanceof IntWritable); - assertEquals(10, ((IntWritable) intFieldObject).get()); - - Object mapFieldObject = inspector.getStructFieldData(o, inspector.getStructFieldRef("myMap")); - assertTrue(mapFieldObject instanceof Map); - Map map = (Map) mapFieldObject; - Object mapValue = map.get(new Text("key1")); - assertNotNull(mapValue); - assertTrue(mapValue instanceof DoubleWritable); - assertEquals(1.0, ((DoubleWritable) mapValue).get(), Double.MIN_VALUE); - - mapValue = map.get(new Text("key2")); - assertNotNull(mapValue); - assertTrue(mapValue instanceof DoubleWritable); - assertEquals(2.0, ((DoubleWritable) mapValue).get(), Double.MIN_VALUE); - - Object decimalFieldObject = inspector.getStructFieldData(o, inspector.getStructFieldRef("myDecimal")); - assertTrue(decimalFieldObject instanceof HiveDecimalWritable); - assertEquals(sampleBigDecimal, ((HiveDecimalWritable) decimalFieldObject).getHiveDecimal().bigDecimalValue()); - } - - @Test - public void test_onTrigger_complex_records_with_bigdecimals() throws Exception { - - Map mapData1 = new TreeMap() {{ - put("key1", 1.0); - put("key2", 2.0); - }}; - - - BigDecimal sampleBigDecimal1 = new BigDecimal("3500.12"); - BigDecimal sampleBigDecimal2 = new BigDecimal("0.01"); - - GenericData.Record record1 = TestNiFiOrcUtils.buildComplexAvroRecord(null, mapData1, "XYZ", 4L, Arrays.asList(100, 200), toByteBuffer(sampleBigDecimal1)); - DatumWriter writer = new GenericDatumWriter<>(record1.getSchema()); - DataFileWriter fileWriter = new DataFileWriter<>(writer); - ByteArrayOutputStream out = new ByteArrayOutputStream(); - fileWriter.create(record1.getSchema(), out); - fileWriter.append(record1); - fileWriter.append(TestNiFiOrcUtils.buildComplexAvroRecord(null, mapData1, "XYZ", 4L, Arrays.asList(100, 200), toByteBuffer(sampleBigDecimal2))); - fileWriter.flush(); - fileWriter.close(); - out.close(); - - Map attributes = new HashMap() {{ - put(CoreAttributes.FILENAME.key(), "test"); - }}; - runner.enqueue(out.toByteArray(), attributes); - runner.run(); - - runner.assertAllFlowFilesTransferred(ConvertAvroToORC.REL_SUCCESS, 1); - - // Write the flow file out to disk, since the ORC Reader needs a path - MockFlowFile resultFlowFile = runner.getFlowFilesForRelationship(ConvertAvroToORC.REL_SUCCESS).get(0); - assertEquals("2", resultFlowFile.getAttribute(ConvertAvroToORC.RECORD_COUNT_ATTRIBUTE)); - assertEquals("test.orc", resultFlowFile.getAttribute(CoreAttributes.FILENAME.key())); - byte[] resultContents = runner.getContentAsByteArray(resultFlowFile); - FileOutputStream fos = new FileOutputStream("target/test1.orc"); - fos.write(resultContents); - fos.flush(); - fos.close(); - - Configuration conf = new Configuration(); - FileSystem fs = FileSystem.getLocal(conf); - Reader reader = OrcFile.createReader(new Path("target/test1.orc"), OrcFile.readerOptions(conf).filesystem(fs)); - RecordReader rows = reader.rows(); - TypeInfo resultSchema = TestNiFiOrcUtils.buildComplexOrcSchema(); - StructObjectInspector inspector = (StructObjectInspector) OrcStruct.createObjectInspector(resultSchema); - - Object result1 = rows.next(null); - assertNotNull(result1); - Object decimalFieldObject1 = inspector.getStructFieldData(result1, inspector.getStructFieldRef("myDecimal")); - assertEquals(sampleBigDecimal1, ((HiveDecimalWritable) decimalFieldObject1).getHiveDecimal().bigDecimalValue()); - - Object result2 = rows.next(null); - assertNotNull(result2); - Object decimalFieldObject2 = inspector.getStructFieldData(result2, inspector.getStructFieldRef("myDecimal")); - assertEquals(sampleBigDecimal2, ((HiveDecimalWritable) decimalFieldObject2).getHiveDecimal().bigDecimalValue()); - } - - private ByteBuffer toByteBuffer(BigDecimal sampleBigDecimal) { - return ByteBuffer.wrap(sampleBigDecimal.unscaledValue().toByteArray()); - } - - @Test - public void test_onTrigger_array_of_records() throws Exception { - final Schema schema = new Schema.Parser().parse(new File("src/test/resources/array_of_records.avsc")); - List innerRecords = new LinkedList<>(); - - final GenericRecord outerRecord = new GenericData.Record(schema); - - Schema arraySchema = schema.getField("records").schema(); - Schema innerRecordSchema = arraySchema.getElementType(); - final GenericRecord innerRecord1 = new GenericData.Record(innerRecordSchema); - innerRecord1.put("name", "Joe"); - innerRecord1.put("age", 42); - - innerRecords.add(innerRecord1); - - final GenericRecord innerRecord2 = new GenericData.Record(innerRecordSchema); - innerRecord2.put("name", "Mary"); - innerRecord2.put("age", 28); - - innerRecords.add(innerRecord2); - - GenericData.Array array = new GenericData.Array<>(arraySchema, innerRecords); - outerRecord.put("records", array); - - final DatumWriter datumWriter = new GenericDatumWriter<>(schema); - ByteArrayOutputStream out = new ByteArrayOutputStream(); - try (DataFileWriter dataFileWriter = new DataFileWriter<>(datumWriter)) { - dataFileWriter.create(schema, out); - dataFileWriter.append(outerRecord); - } - out.close(); - - // Build a flow file from the Avro record - Map attributes = new HashMap() {{ - put(CoreAttributes.FILENAME.key(), "test"); - }}; - runner.enqueue(out.toByteArray(), attributes); - runner.run(); - - runner.assertAllFlowFilesTransferred(ConvertAvroToORC.REL_SUCCESS, 1); - - // Write the flow file out to disk, since the ORC Reader needs a path - MockFlowFile resultFlowFile = runner.getFlowFilesForRelationship(ConvertAvroToORC.REL_SUCCESS).get(0); - assertEquals("CREATE EXTERNAL TABLE IF NOT EXISTS org_apache_nifi_outer_record " + - "(records ARRAY>)" - + " STORED AS ORC", resultFlowFile.getAttribute(ConvertAvroToORC.HIVE_DDL_ATTRIBUTE)); - assertEquals("1", resultFlowFile.getAttribute(ConvertAvroToORC.RECORD_COUNT_ATTRIBUTE)); - assertEquals("test.orc", resultFlowFile.getAttribute(CoreAttributes.FILENAME.key())); - byte[] resultContents = runner.getContentAsByteArray(resultFlowFile); - FileOutputStream fos = new FileOutputStream("target/test1.orc"); - fos.write(resultContents); - fos.flush(); - fos.close(); - - Configuration conf = new Configuration(); - FileSystem fs = FileSystem.getLocal(conf); - Reader reader = OrcFile.createReader(new Path("target/test1.orc"), OrcFile.readerOptions(conf).filesystem(fs)); - RecordReader rows = reader.rows(); - Object o = rows.next(null); - assertNotNull(o); - assertTrue(o instanceof OrcStruct); - StructObjectInspector inspector = (StructObjectInspector) OrcStruct.createObjectInspector(NiFiOrcUtils.getOrcField(schema)); - - // Verify the record contains an array - Object arrayFieldObject = inspector.getStructFieldData(o, inspector.getStructFieldRef("records")); - assertTrue(arrayFieldObject instanceof ArrayList); - ArrayList arrayField = (ArrayList) arrayFieldObject; - assertEquals(2, arrayField.size()); - - // Verify the first element. Should be a record with two fields "name" and "age" - Object element = arrayField.get(0); - assertTrue(element instanceof OrcStruct); - StructObjectInspector elementInspector = (StructObjectInspector) OrcStruct.createObjectInspector(NiFiOrcUtils.getOrcField(innerRecordSchema)); - Object nameObject = elementInspector.getStructFieldData(element, elementInspector.getStructFieldRef("name")); - assertTrue(nameObject instanceof Text); - assertEquals("Joe", nameObject.toString()); - Object ageObject = elementInspector.getStructFieldData(element, elementInspector.getStructFieldRef("age")); - assertTrue(ageObject instanceof IntWritable); - assertEquals(42, ((IntWritable) ageObject).get()); - - // Verify the first element. Should be a record with two fields "name" and "age" - element = arrayField.get(1); - assertTrue(element instanceof OrcStruct); - nameObject = elementInspector.getStructFieldData(element, elementInspector.getStructFieldRef("name")); - assertTrue(nameObject instanceof Text); - assertEquals("Mary", nameObject.toString()); - ageObject = elementInspector.getStructFieldData(element, elementInspector.getStructFieldRef("age")); - assertTrue(ageObject instanceof IntWritable); - assertEquals(28, ((IntWritable) ageObject).get()); - } - - @Test - public void test_onTrigger_nested_complex_record() throws Exception { - - Map> mapData1 = new TreeMap>() {{ - put("key1", Arrays.asList(1.0, 2.0)); - put("key2", Arrays.asList(3.0, 4.0)); - }}; - - Map arrayMap11 = new TreeMap() {{ - put("key1", "v1"); - put("key2", "v2"); - }}; - Map arrayMap12 = new TreeMap() {{ - put("key3", "v3"); - put("key4", "v4"); - }}; - - GenericData.Record record = TestNiFiOrcUtils.buildNestedComplexAvroRecord(mapData1, Arrays.asList(arrayMap11, arrayMap12)); - - DatumWriter writer = new GenericDatumWriter<>(record.getSchema()); - DataFileWriter fileWriter = new DataFileWriter<>(writer); - ByteArrayOutputStream out = new ByteArrayOutputStream(); - fileWriter.create(record.getSchema(), out); - fileWriter.append(record); - - // Put another record in - Map> mapData2 = new TreeMap>() {{ - put("key1", Arrays.asList(-1.0, -2.0)); - put("key2", Arrays.asList(-3.0, -4.0)); - }}; - - Map arrayMap21 = new TreeMap() {{ - put("key1", "v-1"); - put("key2", "v-2"); - }}; - Map arrayMap22 = new TreeMap() {{ - put("key3", "v-3"); - put("key4", "v-4"); - }}; - - record = TestNiFiOrcUtils.buildNestedComplexAvroRecord(mapData2, Arrays.asList(arrayMap21, arrayMap22)); - fileWriter.append(record); - - fileWriter.flush(); - fileWriter.close(); - out.close(); - - Map attributes = new HashMap() {{ - put(CoreAttributes.FILENAME.key(), "test"); - }}; - runner.enqueue(out.toByteArray(), attributes); - runner.run(); - - runner.assertAllFlowFilesTransferred(ConvertAvroToORC.REL_SUCCESS, 1); - - // Write the flow file out to disk, since the ORC Reader needs a path - MockFlowFile resultFlowFile = runner.getFlowFilesForRelationship(ConvertAvroToORC.REL_SUCCESS).get(0); - assertEquals("CREATE EXTERNAL TABLE IF NOT EXISTS nested_complex_record " + - "(myMapOfArray MAP>, myArrayOfMap ARRAY>)" - + " STORED AS ORC", resultFlowFile.getAttribute(ConvertAvroToORC.HIVE_DDL_ATTRIBUTE)); - assertEquals("2", resultFlowFile.getAttribute(ConvertAvroToORC.RECORD_COUNT_ATTRIBUTE)); - assertEquals("test.orc", resultFlowFile.getAttribute(CoreAttributes.FILENAME.key())); - byte[] resultContents = runner.getContentAsByteArray(resultFlowFile); - FileOutputStream fos = new FileOutputStream("target/test1.orc"); - fos.write(resultContents); - fos.flush(); - fos.close(); - - Configuration conf = new Configuration(); - FileSystem fs = FileSystem.getLocal(conf); - Reader reader = OrcFile.createReader(new Path("target/test1.orc"), OrcFile.readerOptions(conf).filesystem(fs)); - RecordReader rows = reader.rows(); - Object o = rows.next(null); - assertNotNull(o); - assertTrue(o instanceof OrcStruct); - TypeInfo resultSchema = TestNiFiOrcUtils.buildNestedComplexOrcSchema(); - StructObjectInspector inspector = (StructObjectInspector) OrcStruct.createObjectInspector(resultSchema); - - - // check values - Object myMapOfArray = inspector.getStructFieldData(o, inspector.getStructFieldRef("myMapOfArray")); - assertTrue(myMapOfArray instanceof Map); - Map map = (Map) myMapOfArray; - Object mapValue = map.get(new Text("key1")); - assertNotNull(mapValue); - assertTrue(mapValue instanceof List); - assertEquals(Arrays.asList(new DoubleWritable(1.0), new DoubleWritable(2.0)), mapValue); - - Object myArrayOfMap = inspector.getStructFieldData(o, inspector.getStructFieldRef("myArrayOfMap")); - assertTrue(myArrayOfMap instanceof List); - List list = (List) myArrayOfMap; - Object el0 = list.get(0); - assertNotNull(el0); - assertTrue(el0 instanceof Map); - assertEquals(new Text("v1"), ((Map) el0).get(new Text("key1"))); - } -} diff --git a/nifi-nar-bundles/nifi-hive-bundle/nifi-hive-processors/src/test/java/org/apache/nifi/processors/hive/TestHiveParser.java b/nifi-nar-bundles/nifi-hive-bundle/nifi-hive-processors/src/test/java/org/apache/nifi/processors/hive/TestHiveParser.java deleted file mode 100644 index 6ffb16b74e..0000000000 --- a/nifi-nar-bundles/nifi-hive-bundle/nifi-hive-processors/src/test/java/org/apache/nifi/processors/hive/TestHiveParser.java +++ /dev/null @@ -1,292 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.nifi.processors.hive; - -import org.apache.nifi.processor.ProcessContext; -import org.apache.nifi.processor.ProcessSessionFactory; -import org.apache.nifi.processor.ProcessorInitializationContext; -import org.apache.nifi.processor.exception.ProcessException; -import org.apache.nifi.util.MockProcessContext; -import org.apache.nifi.util.MockProcessorInitializationContext; -import org.junit.jupiter.api.BeforeEach; -import org.junit.jupiter.api.Test; - -import java.util.Set; -import java.util.concurrent.atomic.AtomicInteger; - -import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.assertTrue; - -public class TestHiveParser extends AbstractHiveQLProcessor { - - @BeforeEach - public void initialize() { - final MockProcessContext processContext = new MockProcessContext(this); - final ProcessorInitializationContext initializationContext = new MockProcessorInitializationContext(this, processContext); - initialize(initializationContext); - } - - @Override - public void onTrigger(ProcessContext context, ProcessSessionFactory sessionFactory) throws ProcessException { - - } - - @Test - public void parseSelect() { - String query = "select a.empid, to_something(b.saraly) from " + - "company.emp a inner join default.salary b where a.empid = b.empid"; - final Set tableNames = findTableNames(query); - System.out.printf("tableNames=%s\n", tableNames); - assertEquals(2, tableNames.size()); - assertTrue(tableNames.contains(new TableName("company", "emp", true))); - assertTrue(tableNames.contains(new TableName("default", "salary", true))); - } - - @Test - public void parseSelectPrepared() { - String query = "select empid from company.emp a where a.firstName = ?"; - final Set tableNames = findTableNames(query); - System.out.printf("tableNames=%s\n", tableNames); - assertEquals(1, tableNames.size()); - assertTrue(tableNames.contains(new TableName("company", "emp", true))); - } - - - @Test - public void parseLongSelect() { - String query = "select\n" + - "\n" + - " i_item_id,\n" + - "\n" + - " i_item_desc,\n" + - "\n" + - " s_state,\n" + - "\n" + - " count(ss_quantity) as store_sales_quantitycount,\n" + - "\n" + - " avg(ss_quantity) as store_sales_quantityave,\n" + - "\n" + - " stddev_samp(ss_quantity) as store_sales_quantitystdev,\n" + - "\n" + - " stddev_samp(ss_quantity) / avg(ss_quantity) as store_sales_quantitycov,\n" + - "\n" + - " count(sr_return_quantity) as store_returns_quantitycount,\n" + - "\n" + - " avg(sr_return_quantity) as store_returns_quantityave,\n" + - "\n" + - " stddev_samp(sr_return_quantity) as store_returns_quantitystdev,\n" + - "\n" + - " stddev_samp(sr_return_quantity) / avg(sr_return_quantity) as store_returns_quantitycov,\n" + - "\n" + - " count(cs_quantity) as catalog_sales_quantitycount,\n" + - "\n" + - " avg(cs_quantity) as catalog_sales_quantityave,\n" + - "\n" + - " stddev_samp(cs_quantity) / avg(cs_quantity) as catalog_sales_quantitystdev,\n" + - "\n" + - " stddev_samp(cs_quantity) / avg(cs_quantity) as catalog_sales_quantitycov\n" + - "\n" + - "from\n" + - "\n" + - " store_sales,\n" + - "\n" + - " store_returns,\n" + - "\n" + - " catalog_sales,\n" + - "\n" + - " date_dim d1,\n" + - "\n" + - " date_dim d2,\n" + - "\n" + - " date_dim d3,\n" + - "\n" + - " store,\n" + - "\n" + - " item\n" + - "\n" + - "where\n" + - "\n" + - " d1.d_quarter_name = '2000Q1'\n" + - "\n" + - " and d1.d_date_sk = ss_sold_date_sk\n" + - "\n" + - " and i_item_sk = ss_item_sk\n" + - "\n" + - " and s_store_sk = ss_store_sk\n" + - "\n" + - " and ss_customer_sk = sr_customer_sk\n" + - "\n" + - " and ss_item_sk = sr_item_sk\n" + - "\n" + - " and ss_ticket_number = sr_ticket_number\n" + - "\n" + - " and sr_returned_date_sk = d2.d_date_sk\n" + - "\n" + - " and d2.d_quarter_name in ('2000Q1' , '2000Q2', '2000Q3')\n" + - "\n" + - " and sr_customer_sk = cs_bill_customer_sk\n" + - "\n" + - " and sr_item_sk = cs_item_sk\n" + - "\n" + - " and cs_sold_date_sk = d3.d_date_sk\n" + - "\n" + - " and d3.d_quarter_name in ('2000Q1' , '2000Q2', '2000Q3')\n" + - "\n" + - "group by i_item_id , i_item_desc , s_state\n" + - "\n" + - "order by i_item_id , i_item_desc , s_state\n" + - "\n" + - "limit 100"; - - final Set tableNames = findTableNames(query); - System.out.printf("tableNames=%s\n", tableNames); - assertEquals(6, tableNames.size()); - AtomicInteger cnt = new AtomicInteger(0); - for (TableName tableName : tableNames) { - if (tableName.equals(new TableName(null, "store_sales", true))) { - cnt.incrementAndGet(); - } else if (tableName.equals(new TableName(null, "store_returns", true))) { - cnt.incrementAndGet(); - } else if (tableName.equals(new TableName(null, "catalog_sales", true))) { - cnt.incrementAndGet(); - } else if (tableName.equals(new TableName(null, "date_dim", true))) { - cnt.incrementAndGet(); - } else if (tableName.equals(new TableName(null, "store", true))) { - cnt.incrementAndGet(); - } else if (tableName.equals(new TableName(null, "item", true))) { - cnt.incrementAndGet(); - } - } - assertEquals(6, cnt.get()); - } - - @Test - public void parseSelectInsert() { - String query = "insert into databaseA.tableA select key, max(value) from databaseA.tableA where category = 'x'"; - - // The same database.tableName can appear two times for input and output. - final Set tableNames = findTableNames(query); - System.out.printf("tableNames=%s\n", tableNames); - assertEquals(2, tableNames.size()); - AtomicInteger cnt = new AtomicInteger(0); - tableNames.forEach(tableName -> { - if (tableName.equals(new TableName("databaseA", "tableA", false))) { - cnt.incrementAndGet(); - } else if (tableName.equals(new TableName("databaseA", "tableA", true))) { - cnt.incrementAndGet(); - } - }); - assertEquals(2, cnt.get()); - } - - @Test - public void parseInsert() { - String query = "insert into databaseB.tableB1 select something from tableA1 a1 inner join tableA2 a2 where a1.id = a2.id"; - - final Set tableNames = findTableNames(query); - System.out.printf("tableNames=%s\n", tableNames); - assertEquals(3, tableNames.size()); - AtomicInteger cnt = new AtomicInteger(0); - tableNames.forEach(tableName -> { - if (tableName.equals(new TableName("databaseB", "tableB1", false))) { - cnt.incrementAndGet(); - } else if (tableName.equals(new TableName(null, "tableA1", true))) { - cnt.incrementAndGet(); - } else if (tableName.equals(new TableName(null, "tableA2", true))) { - cnt.incrementAndGet(); - } - }); - assertEquals(3, cnt.get()); - } - - @Test - public void parseUpdate() { - String query = "update table_a set y = 'updated' where x > 100"; - - final Set tableNames = findTableNames(query); - System.out.printf("tableNames=%s\n", tableNames); - assertEquals(1, tableNames.size()); - assertTrue(tableNames.contains(new TableName(null, "table_a", false))); - } - - @Test - public void parseDelete() { - String query = "delete from table_a where x > 100"; - - final Set tableNames = findTableNames(query); - System.out.printf("tableNames=%s\n", tableNames); - assertEquals(1, tableNames.size()); - assertTrue(tableNames.contains(new TableName(null, "table_a", false))); - } - - @Test - public void parseDDL() { - String query = "CREATE TABLE IF NOT EXISTS EMPLOYEES(\n" + - "EmployeeID INT,FirstName STRING, Title STRING,\n" + - "State STRING, Laptop STRING)\n" + - "COMMENT 'Employee Names'\n" + - "STORED AS ORC"; - - - final Set tableNames = findTableNames(query); - System.out.printf("tableNames=%s\n", tableNames); - assertEquals(1, tableNames.size()); - assertTrue(tableNames.contains(new TableName(null, "EMPLOYEES", false))); - } - - @Test - public void parseSetProperty() { - String query = " set 'hive.exec.dynamic.partition.mode'=nonstrict"; - final Set tableNames = findTableNames(query); - System.out.printf("tableNames=%s\n", tableNames); - assertEquals(0, tableNames.size()); - } - - @Test - public void parseSetRole() { - String query = "set role all"; - final Set tableNames = findTableNames(query); - System.out.printf("tableNames=%s\n", tableNames); - assertEquals(0, tableNames.size()); - } - - @Test - public void parseShowRoles() { - String query = "show roles"; - final Set tableNames = findTableNames(query); - System.out.printf("tableNames=%s\n", tableNames); - assertEquals(0, tableNames.size()); - } - - @Test - public void parseMsck() { - String query = "msck repair table table_a"; - final Set tableNames = findTableNames(query); - System.out.printf("tableNames=%s\n", tableNames); - assertEquals(1, tableNames.size()); - assertTrue(tableNames.contains(new TableName(null, "table_a", false))); - } - - @Test - public void parseAddJar() { - String query = "ADD JAR hdfs:///tmp/my_jar.jar"; - final Set tableNames = findTableNames(query); - System.out.printf("tableNames=%s\n", tableNames); - assertEquals(0, tableNames.size()); - } - -} diff --git a/nifi-nar-bundles/nifi-hive-bundle/nifi-hive-processors/src/test/java/org/apache/nifi/processors/hive/TestPutHiveQL.java b/nifi-nar-bundles/nifi-hive-bundle/nifi-hive-processors/src/test/java/org/apache/nifi/processors/hive/TestPutHiveQL.java deleted file mode 100644 index e9eb29f487..0000000000 --- a/nifi-nar-bundles/nifi-hive-bundle/nifi-hive-processors/src/test/java/org/apache/nifi/processors/hive/TestPutHiveQL.java +++ /dev/null @@ -1,846 +0,0 @@ -package org.apache.nifi.processors.hive;/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import org.apache.nifi.controller.AbstractControllerService; -import org.apache.nifi.dbcp.DBCPService; -import org.apache.nifi.dbcp.hive.HiveDBCPService; -import org.apache.nifi.processor.exception.ProcessException; -import org.apache.nifi.processor.util.pattern.RollbackOnFailure; -import org.apache.nifi.reporting.InitializationException; -import org.apache.nifi.util.TestRunner; -import org.apache.nifi.util.TestRunners; -import org.junit.jupiter.api.BeforeAll; -import org.junit.jupiter.api.Test; -import org.junit.jupiter.api.condition.DisabledOnOs; -import org.junit.jupiter.api.condition.OS; -import org.junit.jupiter.api.io.TempDir; -import org.mockito.Mockito; - -import java.io.File; -import java.io.IOException; -import java.nio.file.Path; -import java.sql.Connection; -import java.sql.DriverManager; -import java.sql.ResultSet; -import java.sql.SQLException; -import java.sql.Statement; -import java.sql.Types; -import java.util.HashMap; -import java.util.Map; - -import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.assertFalse; -import static org.junit.jupiter.api.Assertions.assertThrows; -import static org.junit.jupiter.api.Assertions.assertTrue; - -@DisabledOnOs(OS.WINDOWS) -public class TestPutHiveQL { - private static final String createPersons = "CREATE TABLE PERSONS (id integer primary key, name varchar(100), code integer)"; - private static final String createPersonsAutoId = "CREATE TABLE PERSONS (id INTEGER NOT NULL GENERATED ALWAYS AS IDENTITY (START WITH 1), name VARCHAR(100), code INTEGER check(code <= 100))"; - - @BeforeAll - public static void setup() { - System.setProperty("derby.stream.error.file", "target/derby.log"); - } - - @Test - public void testDirectStatements(@TempDir Path tempDir) throws InitializationException, ProcessException, SQLException, IOException { - final TestRunner runner = TestRunners.newTestRunner(PutHiveQL.class); - final File dbDir = tempDir.resolve("db").toFile(); - final DBCPService service = new MockDBCPService(dbDir.getAbsolutePath()); - runner.addControllerService("dbcp", service); - runner.enableControllerService(service); - - try (final Connection conn = service.getConnection()) { - try (final Statement stmt = conn.createStatement()) { - stmt.executeUpdate(createPersons); - } - } - - runner.setProperty(PutHiveQL.HIVE_DBCP_SERVICE, "dbcp"); - runner.enqueue("INSERT INTO PERSONS (ID, NAME, CODE) VALUES (1, 'Mark', 84)".getBytes()); - runner.run(); - - runner.assertAllFlowFilesTransferred(PutHiveQL.REL_SUCCESS, 1); - - try (final Connection conn = service.getConnection()) { - try (final Statement stmt = conn.createStatement()) { - final ResultSet rs = stmt.executeQuery("SELECT * FROM PERSONS"); - assertTrue(rs.next()); - assertEquals(1, rs.getInt(1)); - assertEquals("Mark", rs.getString(2)); - assertEquals(84, rs.getInt(3)); - assertFalse(rs.next()); - } - } - - runner.enqueue("UPDATE PERSONS SET NAME='George' WHERE ID=1".getBytes()); - runner.run(); - - try (final Connection conn = service.getConnection()) { - try (final Statement stmt = conn.createStatement()) { - final ResultSet rs = stmt.executeQuery("SELECT * FROM PERSONS"); - assertTrue(rs.next()); - assertEquals(1, rs.getInt(1)); - assertEquals("George", rs.getString(2)); - assertEquals(84, rs.getInt(3)); - assertFalse(rs.next()); - } - } - } - - @Test - public void testFailInMiddleWithBadStatement(@TempDir Path tempDir) throws InitializationException, ProcessException, SQLException, IOException { - final TestRunner runner = TestRunners.newTestRunner(PutHiveQL.class); - final File dbDir = tempDir.resolve("db").toFile(); - final DBCPService service = new MockDBCPService(dbDir.getAbsolutePath()); - runner.addControllerService("dbcp", service); - runner.enableControllerService(service); - - try (final Connection conn = service.getConnection()) { - try (final Statement stmt = conn.createStatement()) { - stmt.executeUpdate(createPersonsAutoId); - } - } - - runner.setProperty(PutHiveQL.HIVE_DBCP_SERVICE, "dbcp"); - runner.enqueue("INSERT INTO PERSONS (NAME, CODE) VALUES ('Mark', 84)".getBytes()); - runner.enqueue("INSERT INTO PERSONS".getBytes()); // intentionally wrong syntax - runner.enqueue("INSERT INTO PERSONS (NAME, CODE) VALUES ('Tom', 3)".getBytes()); - runner.enqueue("INSERT INTO PERSONS (NAME, CODE) VALUES ('Harry', 44)".getBytes()); - runner.run(); - - runner.assertTransferCount(PutHiveQL.REL_FAILURE, 1); - runner.assertTransferCount(PutHiveQL.REL_SUCCESS, 3); - runner.getFlowFilesForRelationship(PutHiveQL.REL_SUCCESS) - .forEach(f -> f.assertAttributeEquals(PutHiveQL.ATTR_OUTPUT_TABLES, "PERSONS")); - } - - @Test - public void testFailInMiddleWithBadStatementRollbackOnFailure(@TempDir Path tempDir) throws InitializationException, ProcessException, SQLException, IOException { - final TestRunner runner = TestRunners.newTestRunner(PutHiveQL.class); - final File dbDir = tempDir.resolve("db").toFile(); - final DBCPService service = new MockDBCPService(dbDir.getAbsolutePath()); - runner.addControllerService("dbcp", service); - runner.enableControllerService(service); - runner.setProperty(RollbackOnFailure.ROLLBACK_ON_FAILURE, "true"); - - try (final Connection conn = service.getConnection()) { - try (final Statement stmt = conn.createStatement()) { - stmt.executeUpdate(createPersonsAutoId); - } - } - - runner.setProperty(PutHiveQL.HIVE_DBCP_SERVICE, "dbcp"); - runner.enqueue("INSERT INTO PERSONS (NAME, CODE) VALUES ('Mark', 84)".getBytes()); - runner.enqueue("INSERT INTO PERSONS".getBytes()); // intentionally wrong syntax - runner.enqueue("INSERT INTO PERSONS (NAME, CODE) VALUES ('Tom', 3)".getBytes()); - runner.enqueue("INSERT INTO PERSONS (NAME, CODE) VALUES ('Harry', 44)".getBytes()); - runner.run(); - - // The 1st one should be routed to success, others should stay in queue. - assertEquals(3, runner.getQueueSize().getObjectCount()); - runner.assertTransferCount(PutHiveQL.REL_FAILURE, 0); - runner.assertTransferCount(PutHiveQL.REL_SUCCESS, 1); - } - - @Test - public void testFailAtBeginning(@TempDir Path tempDir) throws InitializationException, ProcessException, SQLException, IOException { - final TestRunner runner = TestRunners.newTestRunner(PutHiveQL.class); - final File dbDir = tempDir.resolve("db").toFile(); - final DBCPService service = new MockDBCPService(dbDir.getAbsolutePath()); - runner.addControllerService("dbcp", service); - runner.enableControllerService(service); - - try (final Connection conn = service.getConnection()) { - try (final Statement stmt = conn.createStatement()) { - stmt.executeUpdate(createPersonsAutoId); - } - } - - runner.setProperty(PutHiveQL.HIVE_DBCP_SERVICE, "dbcp"); - runner.enqueue("INSERT INTO PERSONS".getBytes()); // intentionally wrong syntax - runner.enqueue("INSERT INTO PERSONS (NAME, CODE) VALUES ('Tom', 3)".getBytes()); - runner.enqueue("INSERT INTO PERSONS (NAME, CODE) VALUES ('Harry', 44)".getBytes()); - runner.run(); - - runner.assertTransferCount(PutHiveQL.REL_FAILURE, 1); - runner.assertTransferCount(PutHiveQL.REL_SUCCESS, 2); - } - - @Test - public void testFailAtBeginningRollbackOnFailure(@TempDir Path tempDir) throws InitializationException, ProcessException, SQLException, IOException { - final TestRunner runner = TestRunners.newTestRunner(PutHiveQL.class); - final File dbDir = tempDir.resolve("db").toFile(); - final DBCPService service = new MockDBCPService(dbDir.getAbsolutePath()); - runner.addControllerService("dbcp", service); - runner.enableControllerService(service); - runner.setProperty(RollbackOnFailure.ROLLBACK_ON_FAILURE, "true"); - - try (final Connection conn = service.getConnection()) { - try (final Statement stmt = conn.createStatement()) { - stmt.executeUpdate(createPersonsAutoId); - } - } - - runner.setProperty(PutHiveQL.HIVE_DBCP_SERVICE, "dbcp"); - runner.enqueue("INSERT INTO PERSONS".getBytes()); // intentionally wrong syntax - runner.enqueue("INSERT INTO PERSONS (NAME, CODE) VALUES ('Tom', 3)".getBytes()); - runner.enqueue("INSERT INTO PERSONS (NAME, CODE) VALUES ('Harry', 44)".getBytes()); - - AssertionError e = assertThrows(AssertionError.class, () -> runner.run()); - assertTrue(e.getCause() instanceof ProcessException); - - assertEquals(3, runner.getQueueSize().getObjectCount()); - runner.assertTransferCount(PutHiveQL.REL_FAILURE, 0); - runner.assertTransferCount(PutHiveQL.REL_SUCCESS, 0); - } - - @Test - public void testFailInMiddleWithBadParameterType(@TempDir Path tempDir) throws InitializationException, ProcessException, SQLException, IOException { - final TestRunner runner = TestRunners.newTestRunner(PutHiveQL.class); - final File dbDir = tempDir.resolve("db").toFile(); - final DBCPService service = new MockDBCPService(dbDir.getAbsolutePath()); - runner.addControllerService("dbcp", service); - runner.enableControllerService(service); - - try (final Connection conn = service.getConnection()) { - try (final Statement stmt = conn.createStatement()) { - stmt.executeUpdate(createPersonsAutoId); - } - } - - runner.setProperty(PutHiveQL.HIVE_DBCP_SERVICE, "dbcp"); - - final Map goodAttributes = new HashMap<>(); - goodAttributes.put("hiveql.args.1.type", String.valueOf(Types.INTEGER)); - goodAttributes.put("hiveql.args.1.value", "84"); - - final Map badAttributes = new HashMap<>(); - badAttributes.put("hiveql.args.1.type", String.valueOf(Types.VARCHAR)); - badAttributes.put("hiveql.args.1.value", "hello"); - - final byte[] data = "INSERT INTO PERSONS (NAME, CODE) VALUES ('Mark', ?)".getBytes(); - runner.enqueue(data, goodAttributes); - runner.enqueue(data, badAttributes); - runner.enqueue(data, goodAttributes); - runner.enqueue(data, goodAttributes); - runner.run(); - - runner.assertTransferCount(PutHiveQL.REL_FAILURE, 1); - runner.assertTransferCount(PutHiveQL.REL_SUCCESS, 3); - } - - - @Test - public void testFailInMiddleWithBadParameterValue(@TempDir Path tempDir) throws InitializationException, ProcessException, SQLException, IOException { - final TestRunner runner = TestRunners.newTestRunner(PutHiveQL.class); - final File dbDir = tempDir.resolve("db").toFile(); - final DBCPService service = new MockDBCPService(dbDir.getAbsolutePath()); - runner.addControllerService("dbcp", service); - runner.enableControllerService(service); - - try (final Connection conn = service.getConnection()) { - try (final Statement stmt = conn.createStatement()) { - stmt.executeUpdate(createPersonsAutoId); - } - } - - runner.setProperty(PutHiveQL.HIVE_DBCP_SERVICE, "dbcp"); - - final Map goodAttributes = new HashMap<>(); - goodAttributes.put("hiveql.args.1.type", String.valueOf(Types.INTEGER)); - goodAttributes.put("hiveql.args.1.value", "84"); - - final Map badAttributes = new HashMap<>(); - badAttributes.put("hiveql.args.1.type", String.valueOf(Types.INTEGER)); - badAttributes.put("hiveql.args.1.value", "101"); // Constraint violation, up to 100 - - final byte[] data = "INSERT INTO PERSONS (NAME, CODE) VALUES ('Mark', ?)".getBytes(); - runner.enqueue(data, goodAttributes); - runner.enqueue(data, badAttributes); - runner.enqueue(data, goodAttributes); - runner.enqueue(data, goodAttributes); - runner.run(); - - runner.assertTransferCount(PutHiveQL.REL_SUCCESS, 3); - runner.assertTransferCount(PutHiveQL.REL_FAILURE, 1); - - try (final Connection conn = service.getConnection()) { - try (final Statement stmt = conn.createStatement()) { - final ResultSet rs = stmt.executeQuery("SELECT * FROM PERSONS"); - assertTrue(rs.next()); - assertEquals(1, rs.getInt(1)); - assertEquals("Mark", rs.getString(2)); - assertEquals(84, rs.getInt(3)); - assertTrue(rs.next()); - assertTrue(rs.next()); - assertFalse(rs.next()); - } - } - } - - @Test - public void testFailInMiddleWithBadNumberFormat(@TempDir Path tempDir) throws InitializationException, ProcessException, SQLException, IOException { - final TestRunner runner = TestRunners.newTestRunner(PutHiveQL.class); - final File dbDir = tempDir.resolve("db").toFile(); - final DBCPService service = new MockDBCPService(dbDir.getAbsolutePath()); - runner.addControllerService("dbcp", service); - runner.enableControllerService(service); - - try (final Connection conn = service.getConnection()) { - try (final Statement stmt = conn.createStatement()) { - stmt.executeUpdate(createPersonsAutoId); - } - } - - runner.setProperty(PutHiveQL.HIVE_DBCP_SERVICE, "dbcp"); - - final Map goodAttributes = new HashMap<>(); - goodAttributes.put("hiveql.args.1.type", String.valueOf(Types.INTEGER)); - goodAttributes.put("hiveql.args.1.value", "84"); - - final Map badAttributes = new HashMap<>(); - badAttributes.put("hiveql.args.1.type", String.valueOf(Types.INTEGER)); - badAttributes.put("hiveql.args.1.value", "NOT_NUMBER"); - - final byte[] data = "INSERT INTO PERSONS (NAME, CODE) VALUES ('Mark', ?)".getBytes(); - runner.enqueue(data, goodAttributes); - runner.enqueue(data, badAttributes); - runner.enqueue(data, goodAttributes); - runner.enqueue(data, goodAttributes); - runner.run(); - - runner.assertTransferCount(PutHiveQL.REL_SUCCESS, 3); - runner.assertTransferCount(PutHiveQL.REL_FAILURE, 1); - - try (final Connection conn = service.getConnection()) { - try (final Statement stmt = conn.createStatement()) { - final ResultSet rs = stmt.executeQuery("SELECT * FROM PERSONS"); - assertTrue(rs.next()); - assertEquals(1, rs.getInt(1)); - assertEquals("Mark", rs.getString(2)); - assertEquals(84, rs.getInt(3)); - assertTrue(rs.next()); - assertTrue(rs.next()); - assertFalse(rs.next()); - } - } - } - - - @Test - public void testUsingSqlDataTypesWithNegativeValues(@TempDir Path tempDir) throws InitializationException, ProcessException, SQLException, IOException { - final TestRunner runner = TestRunners.newTestRunner(PutHiveQL.class); - final File dbDir = tempDir.resolve("db").toFile(); - final DBCPService service = new MockDBCPService(dbDir.getAbsolutePath()); - runner.addControllerService("dbcp", service); - runner.enableControllerService(service); - - try (final Connection conn = service.getConnection()) { - try (final Statement stmt = conn.createStatement()) { - stmt.executeUpdate("CREATE TABLE PERSONS (id integer primary key, name varchar(100), code bigint)"); - } - } - - runner.setProperty(PutHiveQL.HIVE_DBCP_SERVICE, "dbcp"); - final Map attributes = new HashMap<>(); - attributes.put("hiveql.args.1.type", "-5"); - attributes.put("hiveql.args.1.value", "84"); - runner.enqueue("INSERT INTO PERSONS (ID, NAME, CODE) VALUES (1, 'Mark', ?)".getBytes(), attributes); - runner.run(); - - runner.assertAllFlowFilesTransferred(PutHiveQL.REL_SUCCESS, 1); - runner.getFlowFilesForRelationship(PutHiveQL.REL_SUCCESS).get(0).assertAttributeEquals(PutHiveQL.ATTR_OUTPUT_TABLES, "PERSONS"); - - try (final Connection conn = service.getConnection()) { - try (final Statement stmt = conn.createStatement()) { - final ResultSet rs = stmt.executeQuery("SELECT * FROM PERSONS"); - assertTrue(rs.next()); - assertEquals(1, rs.getInt(1)); - assertEquals("Mark", rs.getString(2)); - assertEquals(84, rs.getInt(3)); - assertFalse(rs.next()); - } - } - } - - @Test - public void testStatementsWithPreparedParameters(@TempDir Path tempDir) throws InitializationException, ProcessException, SQLException, IOException { - final TestRunner runner = TestRunners.newTestRunner(PutHiveQL.class); - final File dbDir = tempDir.resolve("db").toFile(); - final DBCPService service = new MockDBCPService(dbDir.getAbsolutePath()); - runner.addControllerService("dbcp", service); - runner.enableControllerService(service); - - try (final Connection conn = service.getConnection()) { - try (final Statement stmt = conn.createStatement()) { - stmt.executeUpdate(createPersons); - } - } - - runner.setProperty(PutHiveQL.HIVE_DBCP_SERVICE, "dbcp"); - final Map attributes = new HashMap<>(); - attributes.put("hiveql.args.1.type", String.valueOf(Types.INTEGER)); - attributes.put("hiveql.args.1.value", "1"); - - attributes.put("hiveql.args.2.type", String.valueOf(Types.VARCHAR)); - attributes.put("hiveql.args.2.value", "Mark"); - - attributes.put("hiveql.args.3.type", String.valueOf(Types.INTEGER)); - attributes.put("hiveql.args.3.value", "84"); - - runner.enqueue("INSERT INTO PERSONS (ID, NAME, CODE) VALUES (?, ?, ?)".getBytes(), attributes); - runner.run(); - - runner.assertAllFlowFilesTransferred(PutHiveQL.REL_SUCCESS, 1); - - try (final Connection conn = service.getConnection()) { - try (final Statement stmt = conn.createStatement()) { - final ResultSet rs = stmt.executeQuery("SELECT * FROM PERSONS"); - assertTrue(rs.next()); - assertEquals(1, rs.getInt(1)); - assertEquals("Mark", rs.getString(2)); - assertEquals(84, rs.getInt(3)); - assertFalse(rs.next()); - } - } - - runner.clearTransferState(); - - attributes.clear(); - attributes.put("hiveql.args.1.type", String.valueOf(Types.VARCHAR)); - attributes.put("hiveql.args.1.value", "George"); - - attributes.put("hiveql.args.2.type", String.valueOf(Types.INTEGER)); - attributes.put("hiveql.args.2.value", "1"); - - runner.enqueue("UPDATE PERSONS SET NAME=? WHERE ID=?".getBytes(), attributes); - runner.run(); - runner.assertAllFlowFilesTransferred(PutHiveQL.REL_SUCCESS, 1); - - try (final Connection conn = service.getConnection()) { - try (final Statement stmt = conn.createStatement()) { - final ResultSet rs = stmt.executeQuery("SELECT * FROM PERSONS"); - assertTrue(rs.next()); - assertEquals(1, rs.getInt(1)); - assertEquals("George", rs.getString(2)); - assertEquals(84, rs.getInt(3)); - assertFalse(rs.next()); - } - } - } - - - @Test - public void testMultipleStatementsWithinFlowFile(@TempDir Path tempDir) throws InitializationException, ProcessException, SQLException, IOException { - final TestRunner runner = TestRunners.newTestRunner(PutHiveQL.class); - final File dbDir = tempDir.resolve("db").toFile(); - final DBCPService service = new MockDBCPService(dbDir.getAbsolutePath()); - runner.addControllerService("dbcp", service); - runner.enableControllerService(service); - - try (final Connection conn = service.getConnection()) { - try (final Statement stmt = conn.createStatement()) { - stmt.executeUpdate(createPersons); - } - } - - runner.setProperty(PutHiveQL.HIVE_DBCP_SERVICE, "dbcp"); - - final String sql = "INSERT INTO PERSONS (ID, NAME, CODE) VALUES (?, ?, ?); " + - "UPDATE PERSONS SET NAME='George' WHERE ID=?; "; - final Map attributes = new HashMap<>(); - attributes.put("hiveql.args.1.type", String.valueOf(Types.INTEGER)); - attributes.put("hiveql.args.1.value", "1"); - - attributes.put("hiveql.args.2.type", String.valueOf(Types.VARCHAR)); - attributes.put("hiveql.args.2.value", "Mark"); - - attributes.put("hiveql.args.3.type", String.valueOf(Types.INTEGER)); - attributes.put("hiveql.args.3.value", "84"); - - attributes.put("hiveql.args.4.type", String.valueOf(Types.INTEGER)); - attributes.put("hiveql.args.4.value", "1"); - - runner.enqueue(sql.getBytes(), attributes); - runner.run(); - - // should fail because of the semicolon - runner.assertAllFlowFilesTransferred(PutHiveQL.REL_SUCCESS, 1); - runner.getFlowFilesForRelationship(PutHiveQL.REL_SUCCESS) - .forEach(f -> f.assertAttributeEquals(PutHiveQL.ATTR_OUTPUT_TABLES, "PERSONS")); - - // Now we can check that the values were inserted by the multi-statement script. - try (final Connection conn = service.getConnection()) { - try (final Statement stmt = conn.createStatement()) { - final ResultSet rs = stmt.executeQuery("SELECT * FROM PERSONS"); - assertTrue(rs.next()); - assertEquals( 1, rs.getInt(1), "Record ID mismatch"); - assertEquals("George", rs.getString(2), "Record NAME mismatch"); - } - } - } - - @Test - public void testMultipleStatementsWithinFlowFilePlusEmbeddedDelimiter(@TempDir Path tempDir) throws InitializationException, ProcessException, SQLException, IOException { - final TestRunner runner = TestRunners.newTestRunner(PutHiveQL.class); - final File dbDir = tempDir.resolve("db").toFile(); - final DBCPService service = new MockDBCPService(dbDir.getAbsolutePath()); - runner.addControllerService("dbcp", service); - runner.enableControllerService(service); - - try (final Connection conn = service.getConnection()) { - try (final Statement stmt = conn.createStatement()) { - stmt.executeUpdate(createPersons); - } - } - - runner.setProperty(PutHiveQL.HIVE_DBCP_SERVICE, "dbcp"); - - final String sql = "INSERT INTO PERSONS (ID, NAME, CODE) VALUES (?, ?, ?); " + - "UPDATE PERSONS SET NAME='George\\;' WHERE ID=?; "; - final Map attributes = new HashMap<>(); - attributes.put("hiveql.args.1.type", String.valueOf(Types.INTEGER)); - attributes.put("hiveql.args.1.value", "1"); - - attributes.put("hiveql.args.2.type", String.valueOf(Types.VARCHAR)); - attributes.put("hiveql.args.2.value", "Mark"); - - attributes.put("hiveql.args.3.type", String.valueOf(Types.INTEGER)); - attributes.put("hiveql.args.3.value", "84"); - - attributes.put("hiveql.args.4.type", String.valueOf(Types.INTEGER)); - attributes.put("hiveql.args.4.value", "1"); - - runner.enqueue(sql.getBytes(), attributes); - runner.run(); - - // should fail because of the semicolon - runner.assertAllFlowFilesTransferred(PutHiveQL.REL_SUCCESS, 1); - - // Now we can check that the values were inserted by the multi-statement script. - try (final Connection conn = service.getConnection()) { - try (final Statement stmt = conn.createStatement()) { - final ResultSet rs = stmt.executeQuery("SELECT * FROM PERSONS"); - assertTrue(rs.next()); - assertEquals(1, rs.getInt(1), "Record ID mismatch"); - assertEquals( "George\\;", rs.getString(2), "Record NAME mismatch"); - } - } - } - - - @Test - public void testWithNullParameter(@TempDir Path tempDir) throws InitializationException, ProcessException, SQLException, IOException { - final TestRunner runner = TestRunners.newTestRunner(PutHiveQL.class); - final File dbDir = tempDir.resolve("db").toFile(); - final DBCPService service = new MockDBCPService(dbDir.getAbsolutePath()); - runner.addControllerService("dbcp", service); - runner.enableControllerService(service); - - try (final Connection conn = service.getConnection()) { - try (final Statement stmt = conn.createStatement()) { - stmt.executeUpdate(createPersons); - } - } - - runner.setProperty(PutHiveQL.HIVE_DBCP_SERVICE, "dbcp"); - final Map attributes = new HashMap<>(); - attributes.put("hiveql.args.1.type", String.valueOf(Types.INTEGER)); - attributes.put("hiveql.args.1.value", "1"); - - attributes.put("hiveql.args.2.type", String.valueOf(Types.VARCHAR)); - attributes.put("hiveql.args.2.value", "Mark"); - - attributes.put("hiveql.args.3.type", String.valueOf(Types.INTEGER)); - - runner.enqueue("INSERT INTO PERSONS (ID, NAME, CODE) VALUES (?, ?, ?)".getBytes(), attributes); - runner.run(); - - runner.assertAllFlowFilesTransferred(PutHiveQL.REL_SUCCESS, 1); - - try (final Connection conn = service.getConnection()) { - try (final Statement stmt = conn.createStatement()) { - final ResultSet rs = stmt.executeQuery("SELECT * FROM PERSONS"); - assertTrue(rs.next()); - assertEquals(1, rs.getInt(1)); - assertEquals("Mark", rs.getString(2)); - assertEquals(0, rs.getInt(3)); - assertFalse(rs.next()); - } - } - } - - @Test - public void testInvalidStatement(@TempDir Path tempDir) throws InitializationException, ProcessException, SQLException, IOException { - final TestRunner runner = TestRunners.newTestRunner(PutHiveQL.class); - final File dbDir = tempDir.resolve("db").toFile(); - final DBCPService service = new MockDBCPService(dbDir.getAbsolutePath()); - runner.addControllerService("dbcp", service); - runner.enableControllerService(service); - - try (final Connection conn = service.getConnection()) { - try (final Statement stmt = conn.createStatement()) { - stmt.executeUpdate(createPersons); - } - } - - runner.setProperty(PutHiveQL.HIVE_DBCP_SERVICE, "dbcp"); - - final String sql = "INSERT INTO PERSONS (ID, NAME, CODE) VALUES (?, ?, ?); " + - "UPDATE SOME_RANDOM_TABLE NAME='George' WHERE ID=?; "; - final Map attributes = new HashMap<>(); - attributes.put("hiveql.args.1.type", String.valueOf(Types.INTEGER)); - attributes.put("hiveql.args.1.value", "1"); - - attributes.put("hiveql.args.2.type", String.valueOf(Types.VARCHAR)); - attributes.put("hiveql.args.2.value", "Mark"); - - attributes.put("hiveql.args.3.type", String.valueOf(Types.INTEGER)); - attributes.put("hiveql.args.3.value", "84"); - - attributes.put("hiveql.args.4.type", String.valueOf(Types.INTEGER)); - attributes.put("hiveql.args.4.value", "1"); - - runner.enqueue(sql.getBytes(), attributes); - runner.run(); - - // should fail because of the table is invalid - runner.assertAllFlowFilesTransferred(PutHiveQL.REL_FAILURE, 1); - - try (final Connection conn = service.getConnection()) { - try (final Statement stmt = conn.createStatement()) { - final ResultSet rs = stmt.executeQuery("SELECT * FROM PERSONS"); - assertTrue(rs.next()); - } - } - } - - - @Test - public void testRetryableFailure() throws InitializationException, ProcessException, SQLException, IOException { - final TestRunner runner = TestRunners.newTestRunner(PutHiveQL.class); - final DBCPService service = new SQLExceptionService(null); - runner.addControllerService("dbcp", service); - runner.enableControllerService(service); - - runner.setProperty(PutHiveQL.HIVE_DBCP_SERVICE, "dbcp"); - - final String sql = "INSERT INTO PERSONS (ID, NAME, CODE) VALUES (?, ?, ?); " + - "UPDATE PERSONS SET NAME='George' WHERE ID=?; "; - - final Map attributes = new HashMap<>(); - attributes.put("hiveql.args.1.type", String.valueOf(Types.INTEGER)); - attributes.put("hiveql.args.1.value", "1"); - - attributes.put("hiveql.args.2.type", String.valueOf(Types.VARCHAR)); - attributes.put("hiveql.args.2.value", "Mark"); - - attributes.put("hiveql.args.3.type", String.valueOf(Types.INTEGER)); - attributes.put("hiveql.args.3.value", "84"); - - attributes.put("hiveql.args.4.type", String.valueOf(Types.INTEGER)); - attributes.put("hiveql.args.4.value", "1"); - - runner.enqueue(sql.getBytes(), attributes); - runner.run(); - - // should fail because there isn't a valid connection and tables don't exist. - runner.assertAllFlowFilesTransferred(PutHiveQL.REL_RETRY, 1); - } - - @Test - public void testRetryableFailureRollbackOnFailure() throws InitializationException, ProcessException, SQLException, IOException { - final TestRunner runner = TestRunners.newTestRunner(PutHiveQL.class); - final DBCPService service = new SQLExceptionService(null); - runner.addControllerService("dbcp", service); - runner.enableControllerService(service); - - runner.setProperty(PutHiveQL.HIVE_DBCP_SERVICE, "dbcp"); - runner.setProperty(RollbackOnFailure.ROLLBACK_ON_FAILURE, "true"); - - final String sql = "INSERT INTO PERSONS (ID, NAME, CODE) VALUES (?, ?, ?); " + - "UPDATE PERSONS SET NAME='George' WHERE ID=?; "; - - final Map attributes = new HashMap<>(); - attributes.put("hiveql.args.1.type", String.valueOf(Types.INTEGER)); - attributes.put("hiveql.args.1.value", "1"); - - attributes.put("hiveql.args.2.type", String.valueOf(Types.VARCHAR)); - attributes.put("hiveql.args.2.value", "Mark"); - - attributes.put("hiveql.args.3.type", String.valueOf(Types.INTEGER)); - attributes.put("hiveql.args.3.value", "84"); - - attributes.put("hiveql.args.4.type", String.valueOf(Types.INTEGER)); - attributes.put("hiveql.args.4.value", "1"); - - runner.enqueue(sql.getBytes(), attributes); - - AssertionError e = assertThrows(AssertionError.class, () -> runner.run()); - assertTrue(e.getCause() instanceof ProcessException); - - assertEquals(1, runner.getQueueSize().getObjectCount()); - runner.assertAllFlowFilesTransferred(PutHiveQL.REL_RETRY, 0); - } - - @Test - public void testUnknownFailure() throws InitializationException, ProcessException { - final TestRunner runner = TestRunners.newTestRunner(PutHiveQL.class); - final SQLExceptionService service = new SQLExceptionService(null); - service.setErrorCode(2); - runner.addControllerService("dbcp", service); - runner.enableControllerService(service); - - runner.setProperty(PutHiveQL.HIVE_DBCP_SERVICE, "dbcp"); - - final String sql = "INSERT INTO PERSONS (ID, NAME, CODE) VALUES (?, ?, ?); " + - "UPDATE PERSONS SET NAME='George' WHERE ID=?; "; - - final Map attributes = new HashMap<>(); - attributes.put("hiveql.args.1.type", String.valueOf(Types.INTEGER)); - attributes.put("hiveql.args.1.value", "1"); - - attributes.put("hiveql.args.2.type", String.valueOf(Types.VARCHAR)); - attributes.put("hiveql.args.2.value", "Mark"); - - attributes.put("hiveql.args.3.type", String.valueOf(Types.INTEGER)); - attributes.put("hiveql.args.3.value", "84"); - - attributes.put("hiveql.args.4.type", String.valueOf(Types.INTEGER)); - attributes.put("hiveql.args.4.value", "1"); - - runner.enqueue(sql.getBytes(), attributes); - runner.run(); - - // should fail because there isn't a valid connection and tables don't exist. - runner.assertAllFlowFilesTransferred(PutHiveQL.REL_RETRY, 1); - } - - @Test - public void testUnknownFailureRollbackOnFailure() throws InitializationException, ProcessException { - final TestRunner runner = TestRunners.newTestRunner(PutHiveQL.class); - final SQLExceptionService service = new SQLExceptionService(null); - service.setErrorCode(0); - runner.addControllerService("dbcp", service); - runner.enableControllerService(service); - - runner.setProperty(PutHiveQL.HIVE_DBCP_SERVICE, "dbcp"); - runner.setProperty(RollbackOnFailure.ROLLBACK_ON_FAILURE, "true"); - - final String sql = "INSERT INTO PERSONS (ID, NAME, CODE) VALUES (?, ?, ?); " + - "UPDATE PERSONS SET NAME='George' WHERE ID=?; "; - - final Map attributes = new HashMap<>(); - attributes.put("hiveql.args.1.type", String.valueOf(Types.INTEGER)); - attributes.put("hiveql.args.1.value", "1"); - - attributes.put("hiveql.args.2.type", String.valueOf(Types.VARCHAR)); - attributes.put("hiveql.args.2.value", "Mark"); - - attributes.put("hiveql.args.3.type", String.valueOf(Types.INTEGER)); - attributes.put("hiveql.args.3.value", "84"); - - attributes.put("hiveql.args.4.type", String.valueOf(Types.INTEGER)); - attributes.put("hiveql.args.4.value", "1"); - - runner.enqueue(sql.getBytes(), attributes); - - AssertionError e = assertThrows(AssertionError.class, () -> runner.run()); - assertTrue(e.getCause() instanceof ProcessException); - - assertEquals(1, runner.getQueueSize().getObjectCount()); - runner.assertAllFlowFilesTransferred(PutHiveQL.REL_RETRY, 0); - } - - /** - * Simple implementation only for testing purposes - */ - private static class MockDBCPService extends AbstractControllerService implements HiveDBCPService { - private final String dbLocation; - - MockDBCPService(final String dbLocation) { - this.dbLocation = dbLocation; - } - - @Override - public String getIdentifier() { - return "dbcp"; - } - - @Override - public Connection getConnection() throws ProcessException { - try { - Class.forName("org.apache.derby.jdbc.EmbeddedDriver"); - return DriverManager.getConnection("jdbc:derby:" + dbLocation + ";create=true"); - } catch (final Exception e) { - e.printStackTrace(); - throw new ProcessException("getConnection failed: " + e); - } - } - - @Override - public String getConnectionURL() { - return "jdbc:derby:" + dbLocation + ";create=true"; - } - } - - /** - * Simple implementation only for testing purposes - */ - private static class SQLExceptionService extends AbstractControllerService implements HiveDBCPService { - private final HiveDBCPService service; - private int allowedBeforeFailure = 0; - private int successful = 0; - private int errorCode = 30000; // Default to a retryable exception code - - SQLExceptionService(final HiveDBCPService service) { - this.service = service; - } - - @Override - public String getIdentifier() { - return "dbcp"; - } - - @Override - public Connection getConnection() throws ProcessException { - try { - if (++successful > allowedBeforeFailure) { - final Connection conn = Mockito.mock(Connection.class); - Mockito.when(conn.prepareStatement(Mockito.any(String.class))).thenThrow(new SQLException("Unit Test Generated SQLException", "42000", errorCode)); - return conn; - } else { - return service.getConnection(); - } - } catch (final Exception e) { - e.printStackTrace(); - throw new ProcessException("getConnection failed: " + e); - } - } - - @Override - public String getConnectionURL() { - return service != null ? service.getConnectionURL() : null; - } - - void setErrorCode(int errorCode) { - this.errorCode = errorCode; - } - } -} diff --git a/nifi-nar-bundles/nifi-hive-bundle/nifi-hive-processors/src/test/java/org/apache/nifi/processors/hive/TestPutHiveStreaming.java b/nifi-nar-bundles/nifi-hive-bundle/nifi-hive-processors/src/test/java/org/apache/nifi/processors/hive/TestPutHiveStreaming.java deleted file mode 100644 index 3bb30f2b4e..0000000000 --- a/nifi-nar-bundles/nifi-hive-bundle/nifi-hive-processors/src/test/java/org/apache/nifi/processors/hive/TestPutHiveStreaming.java +++ /dev/null @@ -1,1162 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.nifi.processors.hive; - -import org.apache.avro.Schema; -import org.apache.avro.file.DataFileStream; -import org.apache.avro.file.DataFileWriter; -import org.apache.avro.generic.GenericData; -import org.apache.avro.generic.GenericDatumReader; -import org.apache.avro.generic.GenericDatumWriter; -import org.apache.avro.generic.GenericRecord; -import org.apache.avro.io.DatumWriter; -import org.apache.hadoop.hive.conf.HiveConf; -import org.apache.hadoop.security.UserGroupInformation; -import org.apache.hive.hcatalog.streaming.HiveEndPoint; -import org.apache.hive.hcatalog.streaming.RecordWriter; -import org.apache.hive.hcatalog.streaming.SerializationError; -import org.apache.hive.hcatalog.streaming.StreamingConnection; -import org.apache.hive.hcatalog.streaming.StreamingException; -import org.apache.hive.hcatalog.streaming.TransactionBatch; -import org.apache.nifi.hadoop.KerberosProperties; -import org.apache.nifi.hadoop.SecurityUtil; -import org.apache.nifi.kerberos.KerberosContext; -import org.apache.nifi.processor.exception.ProcessException; -import org.apache.nifi.security.krb.KerberosPasswordUser; -import org.apache.nifi.security.krb.KerberosUser; -import org.apache.nifi.util.MockFlowFile; -import org.apache.nifi.util.TestRunner; -import org.apache.nifi.util.TestRunners; -import org.apache.nifi.util.hive.AuthenticationFailedException; -import org.apache.nifi.util.hive.HiveConfigurator; -import org.apache.nifi.util.hive.HiveOptions; -import org.apache.nifi.util.hive.HiveWriter; -import org.junit.jupiter.api.BeforeEach; -import org.junit.jupiter.api.Test; -import org.mockito.AdditionalMatchers; - -import java.io.ByteArrayInputStream; -import java.io.ByteArrayOutputStream; -import java.io.File; -import java.io.IOException; -import java.util.Arrays; -import java.util.Collections; -import java.util.HashMap; -import java.util.LinkedList; -import java.util.List; -import java.util.Map; -import java.util.concurrent.ExecutorService; - -import static org.apache.nifi.processors.hive.AbstractHiveQLProcessor.ATTR_OUTPUT_TABLES; -import static org.apache.nifi.processors.hive.PutHiveStreaming.HIVE_STREAMING_RECORD_COUNT_ATTR; -import static org.apache.nifi.processors.hive.PutHiveStreaming.REL_SUCCESS; -import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.assertFalse; -import static org.junit.jupiter.api.Assertions.assertNotNull; -import static org.junit.jupiter.api.Assertions.assertNull; -import static org.junit.jupiter.api.Assertions.assertThrows; -import static org.junit.jupiter.api.Assertions.assertTrue; -import static org.mockito.ArgumentMatchers.any; -import static org.mockito.ArgumentMatchers.anyString; -import static org.mockito.ArgumentMatchers.eq; -import static org.mockito.ArgumentMatchers.isNull; -import static org.mockito.Mockito.mock; -import static org.mockito.Mockito.when; - -/** - * Unit tests for PutHiveStreaming processor. - */ -public class TestPutHiveStreaming { - - private TestRunner runner; - private MockPutHiveStreaming processor; - - private KerberosProperties kerberosPropsWithFile; - private HiveConfigurator hiveConfigurator; - private HiveConf hiveConf; - private UserGroupInformation ugi; - - @BeforeEach - public void setUp() { - - // needed for calls to UserGroupInformation.setConfiguration() to work when passing in - // config with Kerberos authentication enabled - System.setProperty("java.security.krb5.realm", "nifi.com"); - System.setProperty("java.security.krb5.kdc", "nifi.kdc"); - - ugi = null; - kerberosPropsWithFile = new KerberosProperties(new File("src/test/resources/krb5.conf")); - - processor = new MockPutHiveStreaming(); - hiveConfigurator = mock(HiveConfigurator.class); - hiveConf = mock(HiveConf.class); - when(hiveConfigurator.getConfigurationFromFiles(AdditionalMatchers.or(anyString(), isNull()))).thenReturn(hiveConf); - processor.hiveConfigurator = hiveConfigurator; - processor.setKerberosProperties(kerberosPropsWithFile); - KerberosContext mockKerberosContext = mock(KerberosContext.class); - when(mockKerberosContext.getKerberosConfigurationFile()).thenReturn(kerberosPropsWithFile.getKerberosConfigFile()); - when(mockKerberosContext.getKerberosServiceKeytab()).thenReturn(null); - when(mockKerberosContext.getKerberosServicePrincipal()).thenReturn(null); - runner = TestRunners.newTestRunner(processor, mockKerberosContext); - } - - @Test - public void testSetup() { - runner.assertNotValid(); - runner.setProperty(PutHiveStreaming.METASTORE_URI, "thrift://localhost:9083"); - runner.setProperty(PutHiveStreaming.DB_NAME, "default"); - runner.assertNotValid(); - runner.setProperty(PutHiveStreaming.TABLE_NAME, "users"); - runner.assertValid(); - runner.run(); - } - - @Test - public void testUgiAndKerberosUserGetsCleared() { - runner.setProperty(PutHiveStreaming.METASTORE_URI, "thrift://localhost:9083"); - runner.setProperty(PutHiveStreaming.DB_NAME, "default"); - runner.setProperty(PutHiveStreaming.TABLE_NAME, "users"); - processor.ugi = mock(UserGroupInformation.class); - processor.kerberosUserReference.set(new KerberosPasswordUser("user", "password")); - runner.run(); - assertNull(processor.ugi); - assertNull(processor.kerberosUserReference.get()); - } - - @Test - public void testUgiGetsSetIfSecure() throws AuthenticationFailedException, IOException { - when(hiveConf.get(SecurityUtil.HADOOP_SECURITY_AUTHENTICATION)).thenReturn(SecurityUtil.KERBEROS); - ugi = mock(UserGroupInformation.class); - when(hiveConfigurator.authenticate(eq(hiveConf), any(KerberosUser.class))).thenReturn(ugi); - runner.setProperty(PutHiveStreaming.METASTORE_URI, "thrift://localhost:9083"); - runner.setProperty(PutHiveStreaming.DB_NAME, "default"); - runner.setProperty(PutHiveStreaming.TABLE_NAME, "users"); - runner.setProperty(kerberosPropsWithFile.getKerberosKeytab(), "src/test/resources/fake.keytab"); - runner.setProperty(kerberosPropsWithFile.getKerberosPrincipal(), "principal"); - Map user1 = new HashMap() { - { - put("name", "Joe"); - put("favorite_number", 146); - } - }; - runner.enqueue(createAvroRecord(Collections.singletonList(user1))); - runner.run(); - } - - @Test - public void testSetupBadPartitionColumns() throws Exception { - runner.setProperty(PutHiveStreaming.METASTORE_URI, "thrift://localhost:9083"); - runner.setProperty(PutHiveStreaming.DB_NAME, "default"); - runner.setProperty(PutHiveStreaming.TABLE_NAME, "users"); - runner.assertValid(); - runner.setProperty(PutHiveStreaming.PARTITION_COLUMNS, "favorite_number,,"); - runner.setProperty(PutHiveStreaming.AUTOCREATE_PARTITIONS, "true"); - runner.assertNotValid(); - } - - @Test - public void testSetupWithKerberosAuthFailed() { - runner.setProperty(PutHiveStreaming.METASTORE_URI, "thrift://localhost:9083"); - runner.setProperty(PutHiveStreaming.DB_NAME, "default"); - runner.setProperty(PutHiveStreaming.TABLE_NAME, "users"); - runner.setProperty(PutHiveStreaming.HIVE_CONFIGURATION_RESOURCES, "src/test/resources/core-site-security.xml, src/test/resources/hive-site-security.xml"); - runner.setProperty(kerberosPropsWithFile.getKerberosPrincipal(), "test@REALM"); - runner.setProperty(kerberosPropsWithFile.getKerberosKeytab(), "src/test/resources/missing.keytab"); - assertThrows(AssertionError.class, () -> runner.run()); - } - - @Test - public void testSingleBatchInvalid() { - runner.setProperty(PutHiveStreaming.METASTORE_URI, "thrift://localhost:9083"); - runner.setProperty(PutHiveStreaming.DB_NAME, "default"); - runner.setProperty(PutHiveStreaming.TABLE_NAME, "users"); - runner.setProperty(PutHiveStreaming.TXNS_PER_BATCH, "2"); - runner.assertValid(); - runner.setProperty(PutHiveStreaming.TXNS_PER_BATCH, "1"); - runner.assertNotValid(); - } - - @Test - public void onTrigger() throws Exception { - runner.setProperty(PutHiveStreaming.METASTORE_URI, "thrift://localhost:9083"); - runner.setProperty(PutHiveStreaming.DB_NAME, "default"); - runner.setProperty(PutHiveStreaming.TABLE_NAME, "users"); - runner.setProperty(PutHiveStreaming.TXNS_PER_BATCH, "100"); - Map user1 = new HashMap() { - { - put("name", "Joe"); - put("favorite_number", 146); - } - }; - runner.enqueue(createAvroRecord(Collections.singletonList(user1))); - runner.run(); - - runner.assertTransferCount(PutHiveStreaming.REL_SUCCESS, 1); - final MockFlowFile flowFile = runner.getFlowFilesForRelationship(PutHiveStreaming.REL_SUCCESS).get(0); - assertEquals("1", flowFile.getAttribute(HIVE_STREAMING_RECORD_COUNT_ATTR)); - assertEquals("default.users", flowFile.getAttribute(ATTR_OUTPUT_TABLES)); - } - - @Test - public void onTriggerBadInput() { - runner.setProperty(PutHiveStreaming.METASTORE_URI, "thrift://localhost:9083"); - runner.setProperty(PutHiveStreaming.DB_NAME, "default"); - runner.setProperty(PutHiveStreaming.TABLE_NAME, "users"); - runner.setProperty(PutHiveStreaming.TXNS_PER_BATCH, "100"); - runner.enqueue("I am not an Avro record".getBytes()); - runner.run(); - - runner.assertTransferCount(PutHiveStreaming.REL_FAILURE, 1); - } - - @Test - public void onTriggerBadInputRollbackOnFailure() { - runner.setProperty(PutHiveStreaming.METASTORE_URI, "thrift://localhost:9083"); - runner.setProperty(PutHiveStreaming.DB_NAME, "default"); - runner.setProperty(PutHiveStreaming.TABLE_NAME, "users"); - runner.setProperty(PutHiveStreaming.TXNS_PER_BATCH, "100"); - runner.setProperty(PutHiveStreaming.ROLLBACK_ON_FAILURE, "true"); - runner.enqueue("I am not an Avro record".getBytes()); - AssertionError ae = assertThrows(AssertionError.class, () -> runner.run()); - assertTrue(ae.getCause() instanceof ProcessException); - - runner.assertTransferCount(PutHiveStreaming.REL_FAILURE, 0); - // Assert incoming FlowFile stays in input queue. - assertEquals(1, runner.getQueueSize().getObjectCount()); - } - - - @Test - public void onTriggerMultipleRecordsSingleTransaction() throws Exception { - runner.setProperty(PutHiveStreaming.METASTORE_URI, "thrift://localhost:9083"); - runner.setProperty(PutHiveStreaming.DB_NAME, "default"); - runner.setProperty(PutHiveStreaming.TABLE_NAME, "users"); - runner.setProperty(PutHiveStreaming.RECORDS_PER_TXN, "100"); - Map user1 = new HashMap() { - { - put("name", "Joe"); - put("favorite_number", 146); - } - }; - Map user2 = new HashMap() { - { - put("name", "Mary"); - put("favorite_number", 42); - } - }; - Map user3 = new HashMap() { - { - put("name", "Matt"); - put("favorite_number", 3); - } - }; - final List> users = Arrays.asList(user1, user2, user3); - runner.enqueue(createAvroRecord(users)); - runner.run(); - - runner.assertTransferCount(PutHiveStreaming.REL_SUCCESS, 1); - MockFlowFile resultFlowFile = runner.getFlowFilesForRelationship(PutHiveStreaming.REL_SUCCESS).get(0); - assertOutputAvroRecords(users, resultFlowFile); - } - - @Test - public void onTriggerMultipleRecordsMultipleTransaction() throws Exception { - runner.setProperty(PutHiveStreaming.METASTORE_URI, "thrift://localhost:9083"); - runner.setProperty(PutHiveStreaming.DB_NAME, "default"); - runner.setProperty(PutHiveStreaming.TABLE_NAME, "users"); - runner.setProperty(PutHiveStreaming.RECORDS_PER_TXN, "2"); - Map user1 = new HashMap() { - { - put("name", "Joe"); - put("favorite_number", 146); - } - }; - Map user2 = new HashMap() { - { - put("name", "Mary"); - put("favorite_number", 42); - } - }; - Map user3 = new HashMap() { - { - put("name", "Matt"); - put("favorite_number", 3); - } - }; - final List> users = Arrays.asList(user1, user2, user3); - runner.enqueue(createAvroRecord(users)); - runner.run(); - - runner.assertTransferCount(PutHiveStreaming.REL_SUCCESS, 1); - MockFlowFile resultFlowFile = runner.getFlowFilesForRelationship(PutHiveStreaming.REL_SUCCESS).get(0); - assertOutputAvroRecords(users, resultFlowFile); - } - - @Test - public void onTriggerMultipleRecordsFailInMiddle() throws Exception { - runner.setProperty(PutHiveStreaming.METASTORE_URI, "thrift://localhost:9083"); - runner.setProperty(PutHiveStreaming.DB_NAME, "default"); - runner.setProperty(PutHiveStreaming.TABLE_NAME, "users"); - runner.setProperty(PutHiveStreaming.RECORDS_PER_TXN, "2"); - processor.setGenerateWriteFailure(true, 1); - Map user1 = new HashMap() { - { - put("name", "Joe"); - put("favorite_number", 146); - } - }; - Map user2 = new HashMap() { - { - put("name", "Mary"); - put("favorite_number", 42); - } - }; - Map user3 = new HashMap() { - { - put("name", "Matt"); - put("favorite_number", 3); - } - }; - Map user4 = new HashMap() { - { - put("name", "Mike"); - put("favorite_number", 345); - } - }; - final List> users = Arrays.asList(user1, user2, user3, user4); - runner.enqueue(createAvroRecord(users)); - runner.run(); - - runner.assertTransferCount(PutHiveStreaming.REL_SUCCESS, 1); - runner.assertTransferCount(PutHiveStreaming.REL_FAILURE, 1); - runner.assertTransferCount(PutHiveStreaming.REL_RETRY, 0); - - MockFlowFile resultFlowFile = runner.getFlowFilesForRelationship(PutHiveStreaming.REL_SUCCESS).get(0); - assertOutputAvroRecords(Arrays.asList(user1, user3, user4), resultFlowFile); - - final MockFlowFile failedFlowFile = runner.getFlowFilesForRelationship(PutHiveStreaming.REL_FAILURE).get(0); - assertOutputAvroRecords(Arrays.asList(user2), failedFlowFile); - } - - @Test - public void onTriggerMultipleRecordsFailInMiddleRollbackOnFailure() throws Exception { - runner.setProperty(PutHiveStreaming.METASTORE_URI, "thrift://localhost:9083"); - runner.setProperty(PutHiveStreaming.DB_NAME, "default"); - runner.setProperty(PutHiveStreaming.TABLE_NAME, "users"); - runner.setProperty(PutHiveStreaming.RECORDS_PER_TXN, "2"); - runner.setProperty(PutHiveStreaming.ROLLBACK_ON_FAILURE, "true"); - processor.setGenerateWriteFailure(true, 1); - Map user1 = new HashMap() { - { - put("name", "Joe"); - put("favorite_number", 146); - } - }; - Map user2 = new HashMap() { - { - put("name", "Mary"); - put("favorite_number", 42); - } - }; - Map user3 = new HashMap() { - { - put("name", "Matt"); - put("favorite_number", 3); - } - }; - runner.enqueue(createAvroRecord(Arrays.asList(user1, user2, user3))); - AssertionError ae = assertThrows(AssertionError.class, () -> runner.run()); - assertTrue(ae.getCause() instanceof ProcessException); - - runner.assertTransferCount(PutHiveStreaming.REL_SUCCESS, 0); - runner.assertTransferCount(PutHiveStreaming.REL_RETRY, 0); - runner.assertTransferCount(PutHiveStreaming.REL_FAILURE, 0); - // Assert incoming FlowFile stays in input queue. - assertEquals(1, runner.getQueueSize().getObjectCount()); - } - - @Test - public void onTriggerMultipleRecordsFailInMiddleRollbackOnFailureCommitted() throws Exception { - runner.setProperty(PutHiveStreaming.METASTORE_URI, "thrift://localhost:9083"); - runner.setProperty(PutHiveStreaming.DB_NAME, "default"); - runner.setProperty(PutHiveStreaming.TABLE_NAME, "users"); - runner.setProperty(PutHiveStreaming.RECORDS_PER_TXN, "2"); - runner.setProperty(PutHiveStreaming.ROLLBACK_ON_FAILURE, "true"); - // The first two records are committed, then an issue will happen at the 3rd record. - processor.setGenerateWriteFailure(true, 2); - Map user1 = new HashMap() { - { - put("name", "Joe"); - put("favorite_number", 146); - } - }; - Map user2 = new HashMap() { - { - put("name", "Mary"); - put("favorite_number", 42); - } - }; - Map user3 = new HashMap() { - { - put("name", "Matt"); - put("favorite_number", 3); - } - }; - Map user4 = new HashMap() { - { - put("name", "Mike"); - put("favorite_number", 345); - } - }; - runner.enqueue(createAvroRecord(Arrays.asList(user1, user2, user3, user4))); - // ProcessException should NOT be thrown, because a Hive Transaction is already committed. - runner.run(); - - runner.assertTransferCount(PutHiveStreaming.REL_SUCCESS, 1); - runner.assertTransferCount(PutHiveStreaming.REL_RETRY, 0); - runner.assertTransferCount(PutHiveStreaming.REL_FAILURE, 0); - - // Assert transferred FlowFile. - assertOutputAvroRecords(Arrays.asList(user1, user2), runner.getFlowFilesForRelationship(REL_SUCCESS).get(0)); - - // Assert incoming FlowFile stays in input queue. - assertEquals(1, runner.getQueueSize().getObjectCount()); - - } - - private void assertOutputAvroRecords(List> expectedRecords, MockFlowFile resultFlowFile) throws IOException { - assertEquals(String.valueOf(expectedRecords.size()), resultFlowFile.getAttribute(PutHiveStreaming.HIVE_STREAMING_RECORD_COUNT_ATTR)); - - final DataFileStream reader = new DataFileStream<>( - new ByteArrayInputStream(resultFlowFile.toByteArray()), - new GenericDatumReader()); - - Schema schema = reader.getSchema(); - - // Verify that the schema is preserved - assertTrue(schema.equals(new Schema.Parser().parse(new File("src/test/resources/user.avsc")))); - - GenericRecord record = null; - for (Map expectedRecord : expectedRecords) { - assertTrue(reader.hasNext()); - record = reader.next(record); - final String name = record.get("name").toString(); - final Integer favorite_number = (Integer) record.get("favorite_number"); - assertNotNull(name); - assertNotNull(favorite_number); - assertNull(record.get("favorite_color")); - assertNull(record.get("scale")); - - assertEquals(expectedRecord.get("name"), name); - assertEquals(expectedRecord.get("favorite_number"), favorite_number); - } - assertFalse(reader.hasNext()); - } - - @Test - public void onTriggerWithPartitionColumns() throws Exception { - runner.setVariable("metastore", "thrift://localhost:9083"); - runner.setVariable("database", "default"); - runner.setVariable("table", "users"); - runner.setVariable("partitions", "favorite_number, favorite_color"); - - runner.setProperty(PutHiveStreaming.METASTORE_URI, "${metastore}"); - runner.setProperty(PutHiveStreaming.DB_NAME, "${database}"); - runner.setProperty(PutHiveStreaming.TABLE_NAME, "${table}"); - runner.setProperty(PutHiveStreaming.TXNS_PER_BATCH, "100"); - runner.setProperty(PutHiveStreaming.PARTITION_COLUMNS, "${partitions}"); - runner.setProperty(PutHiveStreaming.AUTOCREATE_PARTITIONS, "true"); - - runner.assertValid(); - - Map user1 = new HashMap() { - { - put("name", "Joe"); - put("favorite_number", 146); - put("favorite_color", "blue"); - } - }; - - runner.enqueue(createAvroRecord(Collections.singletonList(user1))); - runner.run(); - - runner.assertTransferCount(PutHiveStreaming.REL_SUCCESS, 1); - final MockFlowFile flowFile = runner.getFlowFilesForRelationship(PutHiveStreaming.REL_SUCCESS).get(0); - assertEquals("1", flowFile.getAttribute(HIVE_STREAMING_RECORD_COUNT_ATTR)); - assertEquals("default.users", flowFile.getAttribute(ATTR_OUTPUT_TABLES)); - runner.assertTransferCount(PutHiveStreaming.REL_FAILURE, 0); - runner.assertTransferCount(PutHiveStreaming.REL_RETRY, 0); - } - - @Test - public void onTriggerWithPartitionColumnsNotInRecord() throws Exception { - runner.setProperty(PutHiveStreaming.METASTORE_URI, "thrift://localhost:9083"); - runner.setProperty(PutHiveStreaming.DB_NAME, "default"); - runner.setProperty(PutHiveStreaming.TABLE_NAME, "users"); - runner.setProperty(PutHiveStreaming.TXNS_PER_BATCH, "100"); - runner.setProperty(PutHiveStreaming.PARTITION_COLUMNS, "favorite_food"); - runner.setProperty(PutHiveStreaming.AUTOCREATE_PARTITIONS, "false"); - Map user1 = new HashMap() { - { - put("name", "Joe"); - put("favorite_number", 146); - put("favorite_color", "blue"); - } - }; - - runner.enqueue(createAvroRecord(Collections.singletonList(user1))); - runner.run(); - - runner.assertTransferCount(PutHiveStreaming.REL_FAILURE, 1); - runner.assertTransferCount(PutHiveStreaming.REL_SUCCESS, 0); - runner.assertTransferCount(PutHiveStreaming.REL_RETRY, 0); - } - - @Test - public void onTriggerWithPartitionColumnsNotInRecordRollbackOnFailure() throws Exception { - runner.setProperty(PutHiveStreaming.METASTORE_URI, "thrift://localhost:9083"); - runner.setProperty(PutHiveStreaming.DB_NAME, "default"); - runner.setProperty(PutHiveStreaming.TABLE_NAME, "users"); - runner.setProperty(PutHiveStreaming.TXNS_PER_BATCH, "100"); - runner.setProperty(PutHiveStreaming.PARTITION_COLUMNS, "favorite_food"); - runner.setProperty(PutHiveStreaming.AUTOCREATE_PARTITIONS, "false"); - runner.setProperty(PutHiveStreaming.ROLLBACK_ON_FAILURE, "true"); - Map user1 = new HashMap() { - { - put("name", "Joe"); - put("favorite_number", 146); - put("favorite_color", "blue"); - } - }; - - runner.enqueue(createAvroRecord(Collections.singletonList(user1))); - AssertionError ae = assertThrows(AssertionError.class, () -> runner.run()); - assertTrue(ae.getCause() instanceof ProcessException); - - runner.assertTransferCount(PutHiveStreaming.REL_FAILURE, 0); - runner.assertTransferCount(PutHiveStreaming.REL_SUCCESS, 0); - runner.assertTransferCount(PutHiveStreaming.REL_RETRY, 0); - // Assert incoming FlowFile stays in input queue. - assertEquals(1, runner.getQueueSize().getObjectCount()); - } - - @Test - public void onTriggerWithRetireWriters() throws Exception { - runner.setProperty(PutHiveStreaming.METASTORE_URI, "thrift://localhost:9083"); - runner.setProperty(PutHiveStreaming.DB_NAME, "default"); - runner.setProperty(PutHiveStreaming.TABLE_NAME, "users"); - runner.setProperty(PutHiveStreaming.TXNS_PER_BATCH, "2"); - Map user1 = new HashMap() { - { - put("name", "Joe"); - put("favorite_number", 146); - } - }; - for (int i = 0; i < 10; i++) { - runner.enqueue(createAvroRecord(Collections.singletonList(user1))); - } - runner.run(10); - - runner.assertTransferCount(PutHiveStreaming.REL_SUCCESS, 10); - runner.assertTransferCount(PutHiveStreaming.REL_FAILURE, 0); - runner.assertTransferCount(PutHiveStreaming.REL_RETRY, 0); - } - - @Test - public void onTriggerWithHeartbeat() throws Exception { - runner.setProperty(PutHiveStreaming.METASTORE_URI, "thrift://localhost:9083"); - runner.setProperty(PutHiveStreaming.DB_NAME, "default"); - runner.setProperty(PutHiveStreaming.TABLE_NAME, "users"); - runner.setProperty(PutHiveStreaming.TXNS_PER_BATCH, "100"); - runner.setProperty(PutHiveStreaming.HEARTBEAT_INTERVAL, "1"); - Map user1 = new HashMap() { - { - put("name", "Joe"); - put("favorite_number", 146); - } - }; - runner.enqueue(createAvroRecord(Collections.singletonList(user1))); - runner.run(1, false); - // Wait for a heartbeat - Thread.sleep(1000); - - runner.enqueue(createAvroRecord(Collections.singletonList(user1))); - runner.run(1, true); - runner.assertTransferCount(PutHiveStreaming.REL_SUCCESS, 2); - runner.assertTransferCount(PutHiveStreaming.REL_FAILURE, 0); - runner.assertTransferCount(PutHiveStreaming.REL_RETRY, 0); - } - - @Test - public void onTriggerWithConnectFailure() throws Exception { - processor.setGenerateConnectFailure(true); - runner.setProperty(PutHiveStreaming.METASTORE_URI, "thrift://localhost:9083"); - runner.setProperty(PutHiveStreaming.DB_NAME, "default"); - runner.setProperty(PutHiveStreaming.TABLE_NAME, "users"); - runner.setProperty(PutHiveStreaming.TXNS_PER_BATCH, "100"); - Map user1 = new HashMap() { - { - put("name", "Joe"); - put("favorite_number", 146); - } - }; - runner.enqueue(createAvroRecord(Collections.singletonList(user1))); - runner.run(); - - runner.assertTransferCount(PutHiveStreaming.REL_RETRY, 1); - runner.assertTransferCount(PutHiveStreaming.REL_FAILURE, 0); - runner.assertTransferCount(PutHiveStreaming.REL_SUCCESS, 0); - } - - @Test - public void onTriggerWithConnectFailureRollbackOnFailure() throws Exception { - processor.setGenerateConnectFailure(true); - runner.setProperty(PutHiveStreaming.METASTORE_URI, "thrift://localhost:9083"); - runner.setProperty(PutHiveStreaming.DB_NAME, "default"); - runner.setProperty(PutHiveStreaming.TABLE_NAME, "users"); - runner.setProperty(PutHiveStreaming.TXNS_PER_BATCH, "100"); - runner.setProperty(PutHiveStreaming.ROLLBACK_ON_FAILURE, "true"); - Map user1 = new HashMap() { - { - put("name", "Joe"); - put("favorite_number", 146); - } - }; - runner.enqueue(createAvroRecord(Collections.singletonList(user1))); - AssertionError ae = assertThrows(AssertionError.class, () -> runner.run()); - assertTrue(ae.getCause() instanceof ProcessException); - - runner.assertTransferCount(PutHiveStreaming.REL_RETRY, 0); - runner.assertTransferCount(PutHiveStreaming.REL_FAILURE, 0); - runner.assertTransferCount(PutHiveStreaming.REL_SUCCESS, 0); - // Assert incoming FlowFile stays in input queue. - assertEquals(1, runner.getQueueSize().getObjectCount()); - } - - @Test - public void onTriggerWithInterruptedException() throws Exception { - processor.setGenerateInterruptedExceptionOnCreateWriter(true); - runner.setProperty(PutHiveStreaming.METASTORE_URI, "thrift://localhost:9083"); - runner.setProperty(PutHiveStreaming.DB_NAME, "default"); - runner.setProperty(PutHiveStreaming.TABLE_NAME, "users"); - runner.setProperty(PutHiveStreaming.TXNS_PER_BATCH, "100"); - Map user1 = new HashMap() { - { - put("name", "Joe"); - put("favorite_number", 146); - } - }; - runner.enqueue(createAvroRecord(Collections.singletonList(user1))); - runner.run(); - - runner.assertTransferCount(PutHiveStreaming.REL_RETRY, 1); - } - - @Test - public void onTriggerWithInterruptedExceptionRollbackOnFailure() throws Exception { - processor.setGenerateInterruptedExceptionOnCreateWriter(true); - runner.setProperty(PutHiveStreaming.METASTORE_URI, "thrift://localhost:9083"); - runner.setProperty(PutHiveStreaming.DB_NAME, "default"); - runner.setProperty(PutHiveStreaming.TABLE_NAME, "users"); - runner.setProperty(PutHiveStreaming.TXNS_PER_BATCH, "100"); - runner.setProperty(PutHiveStreaming.ROLLBACK_ON_FAILURE, "true"); - Map user1 = new HashMap() { - { - put("name", "Joe"); - put("favorite_number", 146); - } - }; - runner.enqueue(createAvroRecord(Collections.singletonList(user1))); - AssertionError ae = assertThrows(AssertionError.class, () -> runner.run()); - assertTrue(ae.getCause() instanceof ProcessException); - - runner.assertTransferCount(PutHiveStreaming.REL_RETRY, 0); - } - - @Test - public void onTriggerWithWriteFailure() throws Exception { - processor.setGenerateWriteFailure(true); - runner.setProperty(PutHiveStreaming.METASTORE_URI, "thrift://localhost:9083"); - runner.setProperty(PutHiveStreaming.DB_NAME, "default"); - runner.setProperty(PutHiveStreaming.TABLE_NAME, "users"); - runner.setProperty(PutHiveStreaming.TXNS_PER_BATCH, "100"); - Map user1 = new HashMap() { - { - put("name", "Joe"); - put("favorite_number", 146); - } - }; - Map user2 = new HashMap() { - { - put("name", "Mary"); - put("favorite_number", 42); - } - }; - runner.enqueue(createAvroRecord(Arrays.asList(user1, user2))); - runner.run(); - - runner.assertTransferCount(PutHiveStreaming.REL_FAILURE, 1); - final MockFlowFile flowFile = runner.getFlowFilesForRelationship(PutHiveStreaming.REL_FAILURE).get(0); - assertEquals("2", flowFile.getAttribute(HIVE_STREAMING_RECORD_COUNT_ATTR)); - assertEquals("default.users", flowFile.getAttribute(ATTR_OUTPUT_TABLES)); - } - - @Test - public void onTriggerWithWriteFailureRollbackOnFailure() throws Exception { - processor.setGenerateWriteFailure(true); - runner.setProperty(PutHiveStreaming.METASTORE_URI, "thrift://localhost:9083"); - runner.setProperty(PutHiveStreaming.DB_NAME, "default"); - runner.setProperty(PutHiveStreaming.TABLE_NAME, "users"); - runner.setProperty(PutHiveStreaming.TXNS_PER_BATCH, "100"); - runner.setProperty(PutHiveStreaming.ROLLBACK_ON_FAILURE, "true"); - Map user1 = new HashMap() { - { - put("name", "Joe"); - put("favorite_number", 146); - } - }; - Map user2 = new HashMap() { - { - put("name", "Mary"); - put("favorite_number", 42); - } - }; - runner.enqueue(createAvroRecord(Arrays.asList(user1, user2))); - AssertionError ae = assertThrows(AssertionError.class, () -> runner.run()); - assertTrue(ae.getCause() instanceof ProcessException); - - runner.assertTransferCount(PutHiveStreaming.REL_FAILURE, 0); - // Assert incoming FlowFile stays in input queue. - assertEquals(1, runner.getQueueSize().getObjectCount()); - } - - @Test - public void onTriggerWithSerializationError() throws Exception { - processor.setGenerateSerializationError(true); - runner.setProperty(PutHiveStreaming.METASTORE_URI, "thrift://localhost:9083"); - runner.setProperty(PutHiveStreaming.DB_NAME, "default"); - runner.setProperty(PutHiveStreaming.TABLE_NAME, "users"); - runner.setProperty(PutHiveStreaming.TXNS_PER_BATCH, "100"); - Map user1 = new HashMap() { - { - put("name", "Joe"); - put("favorite_number", 146); - } - }; - runner.enqueue(createAvroRecord(Collections.singletonList(user1))); - runner.run(); - - runner.assertTransferCount(PutHiveStreaming.REL_SUCCESS, 0); - runner.assertTransferCount(PutHiveStreaming.REL_FAILURE, 1); - } - - @Test - public void onTriggerWithSerializationErrorRollbackOnFailure() throws Exception { - processor.setGenerateSerializationError(true); - runner.setProperty(PutHiveStreaming.METASTORE_URI, "thrift://localhost:9083"); - runner.setProperty(PutHiveStreaming.DB_NAME, "default"); - runner.setProperty(PutHiveStreaming.TABLE_NAME, "users"); - runner.setProperty(PutHiveStreaming.TXNS_PER_BATCH, "100"); - runner.setProperty(PutHiveStreaming.ROLLBACK_ON_FAILURE, "true"); - Map user1 = new HashMap() { - { - put("name", "Joe"); - put("favorite_number", 146); - } - }; - runner.enqueue(createAvroRecord(Collections.singletonList(user1))); - AssertionError ae = assertThrows(AssertionError.class, () -> runner.run()); - assertTrue(ae.getCause() instanceof ProcessException); - - runner.assertTransferCount(PutHiveStreaming.REL_SUCCESS, 0); - runner.assertTransferCount(PutHiveStreaming.REL_FAILURE, 0); - // Assert incoming FlowFile stays in input queue. - assertEquals(1, runner.getQueueSize().getObjectCount()); - } - - @Test - public void onTriggerWithCommitFailure() throws Exception { - processor.setGenerateCommitFailure(true); - runner.setProperty(PutHiveStreaming.METASTORE_URI, "thrift://localhost:9083"); - runner.setProperty(PutHiveStreaming.DB_NAME, "default"); - runner.setProperty(PutHiveStreaming.TABLE_NAME, "users"); - runner.setProperty(PutHiveStreaming.TXNS_PER_BATCH, "100"); - Map user1 = new HashMap() { - { - put("name", "Joe"); - put("favorite_number", 146); - } - }; - runner.enqueue(createAvroRecord(Collections.singletonList(user1))); - runner.run(); - - runner.assertTransferCount(PutHiveStreaming.REL_FAILURE, 0); - runner.assertTransferCount(PutHiveStreaming.REL_SUCCESS, 0); - runner.assertTransferCount(PutHiveStreaming.REL_RETRY, 1); - } - - @Test - public void onTriggerWithCommitFailureRollbackOnFailure() throws Exception { - processor.setGenerateCommitFailure(true); - runner.setProperty(PutHiveStreaming.METASTORE_URI, "thrift://localhost:9083"); - runner.setProperty(PutHiveStreaming.DB_NAME, "default"); - runner.setProperty(PutHiveStreaming.TABLE_NAME, "users"); - runner.setProperty(PutHiveStreaming.TXNS_PER_BATCH, "100"); - runner.setProperty(PutHiveStreaming.ROLLBACK_ON_FAILURE, "true"); - Map user1 = new HashMap() { - { - put("name", "Joe"); - put("favorite_number", 146); - } - }; - runner.enqueue(createAvroRecord(Collections.singletonList(user1))); - AssertionError ae = assertThrows(AssertionError.class, () -> runner.run()); - assertTrue(ae.getCause() instanceof ProcessException); - - runner.assertTransferCount(PutHiveStreaming.REL_FAILURE, 0); - runner.assertTransferCount(PutHiveStreaming.REL_SUCCESS, 0); - runner.assertTransferCount(PutHiveStreaming.REL_RETRY, 0); - // Assert incoming FlowFile stays in input queue. - assertEquals(1, runner.getQueueSize().getObjectCount()); - } - - @Test - public void onTriggerWithTransactionFailure() throws Exception { - processor.setGenerateTransactionFailure(true); - runner.setProperty(PutHiveStreaming.METASTORE_URI, "thrift://localhost:9083"); - runner.setProperty(PutHiveStreaming.DB_NAME, "default"); - runner.setProperty(PutHiveStreaming.TABLE_NAME, "users"); - runner.setProperty(PutHiveStreaming.TXNS_PER_BATCH, "100"); - Map user1 = new HashMap() { - { - put("name", "Joe"); - put("favorite_number", 146); - } - }; - runner.enqueue(createAvroRecord(Collections.singletonList(user1))); - runner.run(); - - runner.assertTransferCount(PutHiveStreaming.REL_FAILURE, 0); - runner.assertTransferCount(PutHiveStreaming.REL_SUCCESS, 0); - runner.assertTransferCount(PutHiveStreaming.REL_RETRY, 1); - } - - @Test - public void onTriggerWithTransactionFailureRollbackOnFailure() throws Exception { - processor.setGenerateTransactionFailure(true); - runner.setProperty(PutHiveStreaming.METASTORE_URI, "thrift://localhost:9083"); - runner.setProperty(PutHiveStreaming.DB_NAME, "default"); - runner.setProperty(PutHiveStreaming.TABLE_NAME, "users"); - runner.setProperty(PutHiveStreaming.TXNS_PER_BATCH, "100"); - runner.setProperty(PutHiveStreaming.ROLLBACK_ON_FAILURE, "true"); - Map user1 = new HashMap() { - { - put("name", "Joe"); - put("favorite_number", 146); - } - }; - runner.enqueue(createAvroRecord(Collections.singletonList(user1))); - AssertionError ae = assertThrows(AssertionError.class, () -> runner.run()); - assertTrue(ae.getCause() instanceof ProcessException); - - runner.assertTransferCount(PutHiveStreaming.REL_FAILURE, 0); - runner.assertTransferCount(PutHiveStreaming.REL_SUCCESS, 0); - runner.assertTransferCount(PutHiveStreaming.REL_RETRY, 0); - // Assert incoming FlowFile stays in input queue. - assertEquals(1, runner.getQueueSize().getObjectCount()); - } - - @Test - public void onTriggerWithExceptionOnFlushAndClose() throws Exception { - processor.setGenerateExceptionOnFlushAndClose(true); - runner.setProperty(PutHiveStreaming.METASTORE_URI, "thrift://localhost:9083"); - runner.setProperty(PutHiveStreaming.DB_NAME, "default"); - runner.setProperty(PutHiveStreaming.TABLE_NAME, "users"); - runner.setProperty(PutHiveStreaming.TXNS_PER_BATCH, "100"); - Map user1 = new HashMap() { - { - put("name", "Joe"); - put("favorite_number", 146); - } - }; - runner.enqueue(createAvroRecord(Collections.singletonList(user1))); - runner.run(); - } - - @Test - public void cleanup() throws Exception { - processor.cleanup(); - } - - @Test - public void flushAllWriters() throws Exception { - - } - - @Test - public void abortAndCloseWriters() throws Exception { - - } - - private byte[] createAvroRecord(List> records) throws IOException { - final Schema schema = new Schema.Parser().parse(new File("src/test/resources/user.avsc")); - - List users = new LinkedList<>(); - for (Map record : records) { - final GenericRecord user = new GenericData.Record(schema); - user.put("name", record.get("name")); - user.put("favorite_number", record.get("favorite_number")); - user.put("favorite_color", record.get("favorite_color")); - users.add(user); - } - final DatumWriter datumWriter = new GenericDatumWriter<>(schema); - ByteArrayOutputStream out = new ByteArrayOutputStream(); - try (DataFileWriter dataFileWriter = new DataFileWriter<>(datumWriter)) { - dataFileWriter.create(schema, out); - for (final GenericRecord user : users) { - dataFileWriter.append(user); - } - } - return out.toByteArray(); - - } - - private class MockPutHiveStreaming extends PutHiveStreaming { - - private KerberosProperties kerberosProperties; - private boolean generateConnectFailure = false; - private boolean generateInterruptedExceptionOnCreateWriter = false; - private boolean generateWriteFailure = false; - private Integer generateWriteFailureRecordIndex; - private boolean generateSerializationError = false; - private boolean generateCommitFailure = false; - private boolean generateTransactionFailure = false; - private boolean generateExceptionOnFlushAndClose = false; - private HiveEndPoint hiveEndPoint = mock(HiveEndPoint.class); - - @Override - public KerberosProperties getKerberosProperties() { - return this.kerberosProperties; - } - - public void setKerberosProperties(KerberosProperties kerberosProperties) { - this.kerberosProperties = kerberosProperties; - } - - @Override - public HiveEndPoint makeHiveEndPoint(List partitionValues, HiveOptions hiveOptions) { - return hiveEndPoint; - } - - @Override - protected HiveWriter makeHiveWriter(HiveEndPoint endPoint, ExecutorService callTimeoutPool, UserGroupInformation ugi, HiveOptions options) - throws HiveWriter.ConnectFailure, InterruptedException { - if (generateConnectFailure) { - throw new HiveWriter.ConnectFailure(endPoint, new Exception()); - } - if (generateInterruptedExceptionOnCreateWriter) { - throw new InterruptedException(); - } - MockHiveWriter hiveWriter = new MockHiveWriter(endPoint, options.getTxnsPerBatch(), options.getAutoCreatePartitions(), options.getCallTimeOut(), callTimeoutPool, ugi, hiveConfig); - hiveWriter.setGenerateWriteFailure(generateWriteFailure, generateWriteFailureRecordIndex); - hiveWriter.setGenerateSerializationError(generateSerializationError); - hiveWriter.setGenerateCommitFailure(generateCommitFailure); - hiveWriter.setGenerateTransactionFailure(generateTransactionFailure); - hiveWriter.setGenerateExceptionOnFlushAndClose(generateExceptionOnFlushAndClose); - return hiveWriter; - } - - public void setGenerateConnectFailure(boolean generateConnectFailure) { - this.generateConnectFailure = generateConnectFailure; - } - - public void setGenerateInterruptedExceptionOnCreateWriter(boolean generateInterruptedExceptionOnCreateWriter) { - this.generateInterruptedExceptionOnCreateWriter = generateInterruptedExceptionOnCreateWriter; - } - - public void setGenerateWriteFailure(boolean generateWriteFailure) { - this.generateWriteFailure = generateWriteFailure; - } - - public void setGenerateWriteFailure(boolean generateWriteFailure, int generateWriteFailureRecordIndex) { - this.generateWriteFailure = generateWriteFailure; - this.generateWriteFailureRecordIndex = generateWriteFailureRecordIndex; - } - - public void setGenerateSerializationError(boolean generateSerializationError) { - this.generateSerializationError = generateSerializationError; - } - - public void setGenerateCommitFailure(boolean generateCommitFailure) { - this.generateCommitFailure = generateCommitFailure; - } - - public void setGenerateTransactionFailure(boolean generateTransactionFailure) { - this.generateTransactionFailure = generateTransactionFailure; - } - - public void setGenerateExceptionOnFlushAndClose(boolean generateExceptionOnFlushAndClose) { - this.generateExceptionOnFlushAndClose = generateExceptionOnFlushAndClose; - } - - @Override - UserGroupInformation getUgi() { - return ugi; - } - } - - private class MockHiveWriter extends HiveWriter { - - private boolean generateWriteFailure = false; - private Integer generateWriteFailureRecordIndex; - private boolean generateSerializationError = false; - private boolean generateCommitFailure = false; - private boolean generateTransactionFailure = false; - private boolean generateExceptionOnFlushAndClose = false; - private int writeAttemptCount = 0; - private int totalRecords = 0; - - private HiveEndPoint endPoint; - - public MockHiveWriter(HiveEndPoint endPoint, int txnsPerBatch, boolean autoCreatePartitions, - long callTimeout, ExecutorService callTimeoutPool, UserGroupInformation ugi, HiveConf hiveConf) - throws InterruptedException, ConnectFailure { - super(endPoint, txnsPerBatch, autoCreatePartitions, callTimeout, callTimeoutPool, ugi, hiveConf); - assertEquals(TestPutHiveStreaming.this.ugi, ugi); - this.endPoint = endPoint; - } - - @Override - public synchronized void write(byte[] record) throws WriteFailure, SerializationError, InterruptedException { - try { - if (generateWriteFailure - && (generateWriteFailureRecordIndex == null || writeAttemptCount == generateWriteFailureRecordIndex)) { - throw new WriteFailure(endPoint, 1L, new Exception()); - } - if (generateSerializationError) { - throw new SerializationError("Test Serialization Error", new Exception()); - } - totalRecords++; - } finally { - writeAttemptCount++; - } - } - - public void setGenerateWriteFailure(boolean generateWriteFailure, Integer generateWriteFailureRecordIndex) { - this.generateWriteFailure = generateWriteFailure; - this.generateWriteFailureRecordIndex = generateWriteFailureRecordIndex; - } - - public void setGenerateSerializationError(boolean generateSerializationError) { - this.generateSerializationError = generateSerializationError; - } - - public void setGenerateCommitFailure(boolean generateCommitFailure) { - this.generateCommitFailure = generateCommitFailure; - } - - public void setGenerateTransactionFailure(boolean generateTransactionFailure) { - this.generateTransactionFailure = generateTransactionFailure; - } - - public void setGenerateExceptionOnFlushAndClose(boolean generateExceptionOnFlushAndClose) { - this.generateExceptionOnFlushAndClose = generateExceptionOnFlushAndClose; - } - - @Override - protected RecordWriter getRecordWriter(HiveEndPoint endPoint, UserGroupInformation ugi, HiveConf conf) throws StreamingException { - assertEquals(hiveConf, conf); - return mock(RecordWriter.class); - } - - @Override - protected StreamingConnection newConnection(HiveEndPoint endPoint, boolean autoCreatePartitions, HiveConf conf, UserGroupInformation ugi) throws InterruptedException, ConnectFailure { - StreamingConnection connection = mock(StreamingConnection.class); - assertEquals(hiveConf, conf); - return connection; - } - - @Override - public void flush(boolean rollToNext) throws CommitFailure, TxnBatchFailure, TxnFailure, InterruptedException { - if (generateCommitFailure) { - throw new HiveWriter.CommitFailure(endPoint, 1L, new Exception()); - } - if (generateTransactionFailure) { - throw new HiveWriter.TxnFailure(mock(TransactionBatch.class), new Exception()); - } - } - - @Override - public void heartBeat() throws InterruptedException { - - } - - @Override - public void flushAndClose() throws TxnBatchFailure, TxnFailure, CommitFailure, IOException, InterruptedException { - if (generateExceptionOnFlushAndClose) { - throw new IOException(); - } - } - - @Override - public void close() throws IOException, InterruptedException { - - } - - @Override - public void abort() throws StreamingException, TxnBatchFailure, InterruptedException { - - } - - @Override - protected void closeConnection() throws InterruptedException { - // Empty - } - - @Override - protected void commitTxn() throws CommitFailure, InterruptedException { - // Empty - } - - @Override - protected TransactionBatch nextTxnBatch(RecordWriter recordWriter) throws InterruptedException, TxnBatchFailure { - TransactionBatch txnBatch = mock(TransactionBatch.class); - return txnBatch; - } - - @Override - protected void closeTxnBatch() throws InterruptedException { - // Empty - } - - @Override - protected void abortTxn() throws InterruptedException { - // Empty - } - - @Override - protected void nextTxn(boolean rollToNext) throws StreamingException, InterruptedException, TxnBatchFailure { - // Empty - } - - @Override - public int getTotalRecords() { - return totalRecords; - } - } - -} diff --git a/nifi-nar-bundles/nifi-hive-bundle/nifi-hive-processors/src/test/java/org/apache/nifi/processors/hive/TestSelectHiveQL.java b/nifi-nar-bundles/nifi-hive-bundle/nifi-hive-processors/src/test/java/org/apache/nifi/processors/hive/TestSelectHiveQL.java deleted file mode 100644 index 02f3d8716a..0000000000 --- a/nifi-nar-bundles/nifi-hive-bundle/nifi-hive-processors/src/test/java/org/apache/nifi/processors/hive/TestSelectHiveQL.java +++ /dev/null @@ -1,736 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.nifi.processors.hive; - -import org.apache.avro.file.DataFileStream; -import org.apache.avro.generic.GenericDatumReader; -import org.apache.avro.generic.GenericRecord; -import org.apache.avro.io.DatumReader; -import org.apache.nifi.controller.AbstractControllerService; -import org.apache.nifi.dbcp.DBCPService; -import org.apache.nifi.dbcp.hive.HiveDBCPService; -import org.apache.nifi.flowfile.attributes.CoreAttributes; -import org.apache.nifi.processor.exception.ProcessException; -import org.apache.nifi.provenance.ProvenanceEventRecord; -import org.apache.nifi.provenance.ProvenanceEventType; -import org.apache.nifi.reporting.InitializationException; -import org.apache.nifi.util.MockFlowFile; -import org.apache.nifi.util.TestRunner; -import org.apache.nifi.util.TestRunners; -import org.apache.nifi.util.hive.HiveJdbcCommon; -import org.junit.jupiter.api.BeforeAll; -import org.junit.jupiter.api.BeforeEach; -import org.junit.jupiter.api.Test; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import java.io.BufferedReader; -import java.io.ByteArrayInputStream; -import java.io.File; -import java.io.IOException; -import java.io.InputStream; -import java.io.InputStreamReader; -import java.sql.Connection; -import java.sql.DriverManager; -import java.sql.SQLException; -import java.sql.Statement; -import java.sql.Types; -import java.util.HashMap; -import java.util.List; -import java.util.Map; -import java.util.Random; - -import static org.apache.nifi.processors.hive.SelectHiveQL.HIVEQL_OUTPUT_FORMAT; -import static org.apache.nifi.util.hive.HiveJdbcCommon.AVRO; -import static org.apache.nifi.util.hive.HiveJdbcCommon.CSV; -import static org.apache.nifi.util.hive.HiveJdbcCommon.CSV_MIME_TYPE; -import static org.apache.nifi.util.hive.HiveJdbcCommon.MIME_TYPE_AVRO_BINARY; -import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.assertTrue; - -public class TestSelectHiveQL { - - private static final Logger LOGGER; - private final static String MAX_ROWS_KEY = "maxRows"; - private final int NUM_OF_ROWS = 100; - - - static { - System.setProperty("org.slf4j.simpleLogger.defaultLogLevel", "info"); - System.setProperty("org.slf4j.simpleLogger.showDateTime", "true"); - System.setProperty("org.slf4j.simpleLogger.log.nifi.io.nio", "debug"); - System.setProperty("org.slf4j.simpleLogger.log.nifi.processors.hive.SelectHiveQL", "debug"); - System.setProperty("org.slf4j.simpleLogger.log.nifi.processors.hive.TestSelectHiveQL", "debug"); - LOGGER = LoggerFactory.getLogger(TestSelectHiveQL.class); - } - - private final static String DB_LOCATION = "target/db"; - - private final static String QUERY_WITH_EL = "select " - + " PER.ID as PersonId, PER.NAME as PersonName, PER.CODE as PersonCode" - + " from persons PER" - + " where PER.ID > ${person.id}"; - - private final static String QUERY_WITHOUT_EL = "select " - + " PER.ID as PersonId, PER.NAME as PersonName, PER.CODE as PersonCode" - + " from persons PER" - + " where PER.ID > 10"; - - - @BeforeAll - public static void setupClass() { - System.setProperty("derby.stream.error.file", "target/derby.log"); - } - - private TestRunner runner; - - @BeforeEach - public void setup() throws InitializationException { - final DBCPService dbcp = new DBCPServiceSimpleImpl(); - final Map dbcpProperties = new HashMap<>(); - - runner = TestRunners.newTestRunner(SelectHiveQL.class); - runner.addControllerService("dbcp", dbcp, dbcpProperties); - runner.enableControllerService(dbcp); - runner.setProperty(SelectHiveQL.HIVE_DBCP_SERVICE, "dbcp"); - } - - @Test - public void testIncomingConnectionWithNoFlowFile() throws InitializationException { - runner.setIncomingConnection(true); - runner.setProperty(SelectHiveQL.HIVEQL_SELECT_QUERY, "SELECT * FROM persons"); - runner.run(); - runner.assertTransferCount(SelectHiveQL.REL_SUCCESS, 0); - runner.assertTransferCount(SelectHiveQL.REL_FAILURE, 0); - } - - @Test - public void testNoIncomingConnection() throws ClassNotFoundException, SQLException, InitializationException, IOException { - runner.setIncomingConnection(false); - invokeOnTrigger(QUERY_WITHOUT_EL, false, "Avro"); - - final List provenanceEvents = runner.getProvenanceEvents(); - final ProvenanceEventRecord provenance0 = provenanceEvents.get(0); - assertEquals(ProvenanceEventType.RECEIVE, provenance0.getEventType()); - assertEquals("jdbc:derby:target/db;create=true", provenance0.getTransitUri()); - } - - @Test - public void testNoTimeLimit() throws InitializationException, ClassNotFoundException, SQLException, IOException { - invokeOnTrigger(QUERY_WITH_EL, true, "Avro"); - - final List provenanceEvents = runner.getProvenanceEvents(); - assertEquals(4, provenanceEvents.size()); - - final ProvenanceEventRecord provenance0 = provenanceEvents.get(0); - assertEquals(ProvenanceEventType.FORK, provenance0.getEventType()); - - final ProvenanceEventRecord provenance1 = provenanceEvents.get(1); - assertEquals(ProvenanceEventType.FETCH, provenance1.getEventType()); - assertEquals("jdbc:derby:target/db;create=true", provenance1.getTransitUri()); - - final ProvenanceEventRecord provenance2 = provenanceEvents.get(2); - assertEquals(ProvenanceEventType.FORK, provenance2.getEventType()); - - // The last one was removed as empty - final ProvenanceEventRecord provenance3 = provenanceEvents.get(3); - assertEquals(ProvenanceEventType.DROP, provenance3.getEventType()); - } - - - @Test - public void testWithNullIntColumn() throws SQLException { - // remove previous test database, if any - final File dbLocation = new File(DB_LOCATION); - dbLocation.delete(); - - // load test data to database - final Connection con = ((HiveDBCPService) runner.getControllerService("dbcp")).getConnection(); - Statement stmt = con.createStatement(); - - try { - stmt.execute("drop table TEST_NULL_INT"); - } catch (final SQLException sqle) { - // Nothing to do, probably means the table didn't exist - } - - stmt.execute("create table TEST_NULL_INT (id integer not null, val1 integer, val2 integer, constraint my_pk primary key (id))"); - - stmt.execute("insert into TEST_NULL_INT (id, val1, val2) VALUES (0, NULL, 1)"); - stmt.execute("insert into TEST_NULL_INT (id, val1, val2) VALUES (1, 1, 1)"); - - runner.setIncomingConnection(false); - runner.setProperty(SelectHiveQL.HIVEQL_SELECT_QUERY, "SELECT * FROM TEST_NULL_INT"); - runner.run(); - - runner.assertAllFlowFilesTransferred(SelectHiveQL.REL_SUCCESS, 1); - runner.assertAllFlowFilesContainAttribute(SelectHiveQL.REL_SUCCESS, SelectHiveQL.RESULT_ROW_COUNT); - runner.assertAllFlowFilesContainAttribute(SelectHiveQL.REL_SUCCESS, SelectHiveQL.RESULT_QUERY_DURATION); - runner.assertAllFlowFilesContainAttribute(SelectHiveQL.REL_SUCCESS, SelectHiveQL.RESULT_QUERY_EXECUTION_TIME); - runner.assertAllFlowFilesContainAttribute(SelectHiveQL.REL_SUCCESS, SelectHiveQL.RESULT_QUERY_FETCH_TIME); - - final List flowfiles = runner.getFlowFilesForRelationship(SelectHiveQL.REL_SUCCESS); - flowfiles.get(0).assertAttributeEquals(SelectHiveQL.RESULT_ROW_COUNT, "2"); - final long executionTime = Long.parseLong(flowfiles.get(0).getAttribute(SelectHiveQL.RESULT_QUERY_EXECUTION_TIME)); - final long fetchTime = Long.parseLong(flowfiles.get(0).getAttribute(SelectHiveQL.RESULT_QUERY_FETCH_TIME)); - final long durationTime = Long.parseLong(flowfiles.get(0).getAttribute(SelectHiveQL.RESULT_QUERY_DURATION)); - - assertEquals(durationTime, fetchTime + executionTime); - } - - @Test - public void testWithSqlException() throws SQLException { - // remove previous test database, if any - final File dbLocation = new File(DB_LOCATION); - dbLocation.delete(); - - // load test data to database - final Connection con = ((HiveDBCPService) runner.getControllerService("dbcp")).getConnection(); - Statement stmt = con.createStatement(); - - try { - stmt.execute("drop table TEST_NO_ROWS"); - } catch (final SQLException sqle) { - // Nothing to do, probably means the table didn't exist - } - - stmt.execute("create table TEST_NO_ROWS (id integer)"); - - runner.setIncomingConnection(false); - // Try a valid SQL statement that will generate an error (val1 does not exist, e.g.) - runner.setProperty(SelectHiveQL.HIVEQL_SELECT_QUERY, "SELECT val1 FROM TEST_NO_ROWS"); - runner.run(); - - runner.assertAllFlowFilesTransferred(SelectHiveQL.REL_FAILURE, 1); - } - - @Test - public void invokeOnTriggerExceptionInPreQueriesNoIncomingFlows() - throws InitializationException, ClassNotFoundException, SQLException, IOException { - - doOnTrigger(QUERY_WITHOUT_EL, false, CSV, - "select 'no exception' from persons; select exception from persons", - null); - - runner.assertAllFlowFilesTransferred(SelectHiveQL.REL_FAILURE, 1); - } - - @Test - public void invokeOnTriggerExceptionInPreQueriesWithIncomingFlows() - throws InitializationException, ClassNotFoundException, SQLException, IOException { - - doOnTrigger(QUERY_WITHOUT_EL, true, CSV, - "select 'no exception' from persons; select exception from persons", - null); - - runner.assertAllFlowFilesTransferred(SelectHiveQL.REL_FAILURE, 1); - } - - @Test - public void invokeOnTriggerExceptionInPostQueriesNoIncomingFlows() - throws InitializationException, ClassNotFoundException, SQLException, IOException { - - doOnTrigger(QUERY_WITHOUT_EL, false, CSV, - null, - "select 'no exception' from persons; select exception from persons"); - - runner.assertAllFlowFilesTransferred(SelectHiveQL.REL_FAILURE, 1); - } - - @Test - public void invokeOnTriggerExceptionInPostQueriesWithIncomingFlows() - throws InitializationException, ClassNotFoundException, SQLException, IOException { - - doOnTrigger(QUERY_WITHOUT_EL, true, CSV, - null, - "select 'no exception' from persons; select exception from persons"); - - // with incoming connections, it should be rolled back - runner.assertAllFlowFilesTransferred(SelectHiveQL.REL_FAILURE, 1); - } - - @Test - public void testWithBadSQL() throws SQLException { - final String BAD_SQL = "create table TEST_NO_ROWS (id integer)"; - - // Test with incoming flow file (it should be routed to failure intact, i.e. same content and no parent) - runner.setIncomingConnection(true); - // Try a valid SQL statement that will generate an error (val1 does not exist, e.g.) - runner.enqueue(BAD_SQL); - runner.run(); - runner.assertAllFlowFilesTransferred(SelectHiveQL.REL_FAILURE, 1); - MockFlowFile flowFile = runner.getFlowFilesForRelationship(SelectHiveQL.REL_FAILURE).get(0); - flowFile.assertContentEquals(BAD_SQL); - flowFile.assertAttributeEquals("parentIds", null); - runner.clearTransferState(); - - // Test with no incoming flow file (an empty flow file is transferred) - runner.setIncomingConnection(false); - // Try a valid SQL statement that will generate an error (val1 does not exist, e.g.) - runner.setProperty(SelectHiveQL.HIVEQL_SELECT_QUERY, BAD_SQL); - runner.run(); - runner.assertAllFlowFilesTransferred(SelectHiveQL.REL_FAILURE, 1); - flowFile = runner.getFlowFilesForRelationship(SelectHiveQL.REL_FAILURE).get(0); - flowFile.assertContentEquals(""); - } - - @Test - public void invokeOnTriggerWithCsv() - throws InitializationException, ClassNotFoundException, SQLException, IOException { - invokeOnTrigger(QUERY_WITHOUT_EL, false, CSV); - } - - @Test - public void invokeOnTriggerWithAvro() - throws InitializationException, ClassNotFoundException, SQLException, IOException { - invokeOnTrigger(QUERY_WITHOUT_EL, false, AVRO); - } - - @Test - public void invokeOnTriggerWithValidPreQieries() - throws InitializationException, ClassNotFoundException, SQLException, IOException { - invokeOnTrigger(QUERY_WITHOUT_EL, false, CSV, - "select '1' from persons; select '2' from persons", //should not be 'select'. But Derby driver doesn't support "set param=val" format. - null); - } - - @Test - public void invokeOnTriggerWithValidPostQieries() - throws InitializationException, ClassNotFoundException, SQLException, IOException { - invokeOnTrigger(QUERY_WITHOUT_EL, false, CSV, - null, - //should not be 'select'. But Derby driver doesn't support "set param=val" format, - //so just providing any "compilable" query. - " select '4' from persons; \nselect '5' from persons"); - } - - @Test - public void invokeOnTriggerWithValidPrePostQieries() - throws InitializationException, ClassNotFoundException, SQLException, IOException { - invokeOnTrigger(QUERY_WITHOUT_EL, false, CSV, - //should not be 'select'. But Derby driver doesn't support "set param=val" format, - //so just providing any "compilable" query. - "select '1' from persons; select '2' from persons", - " select '4' from persons; \nselect '5' from persons"); - } - - - public void invokeOnTrigger(final String query, final boolean incomingFlowFile, String outputFormat) - throws InitializationException, ClassNotFoundException, SQLException, IOException { - invokeOnTrigger(query, incomingFlowFile, outputFormat, null, null); - } - - public void invokeOnTrigger(final String query, final boolean incomingFlowFile, String outputFormat, - String preQueries, String postQueries) - throws InitializationException, ClassNotFoundException, SQLException, IOException { - - TestRunner runner = doOnTrigger(query, incomingFlowFile, outputFormat, preQueries, postQueries); - runner.assertAllFlowFilesTransferred(SelectHiveQL.REL_SUCCESS, 1); - runner.assertAllFlowFilesContainAttribute(SelectHiveQL.REL_SUCCESS, SelectHiveQL.RESULT_ROW_COUNT); - runner.assertAllFlowFilesContainAttribute(SelectHiveQL.REL_SUCCESS, SelectHiveQL.RESULT_QUERY_DURATION); - runner.assertAllFlowFilesContainAttribute(SelectHiveQL.REL_SUCCESS, SelectHiveQL.RESULT_QUERY_EXECUTION_TIME); - runner.assertAllFlowFilesContainAttribute(SelectHiveQL.REL_SUCCESS, SelectHiveQL.RESULT_QUERY_FETCH_TIME); - - final List flowfiles = runner.getFlowFilesForRelationship(SelectHiveQL.REL_SUCCESS); - MockFlowFile flowFile = flowfiles.get(0); - final InputStream in = new ByteArrayInputStream(flowFile.toByteArray()); - long recordsFromStream = 0; - if (AVRO.equals(outputFormat)) { - assertEquals(MIME_TYPE_AVRO_BINARY, flowFile.getAttribute(CoreAttributes.MIME_TYPE.key())); - final DatumReader datumReader = new GenericDatumReader<>(); - try (DataFileStream dataFileReader = new DataFileStream<>(in, datumReader)) { - GenericRecord record = null; - while (dataFileReader.hasNext()) { - // Reuse record object by passing it to next(). This saves us from - // allocating and garbage collecting many objects for files with - // many items. - record = dataFileReader.next(record); - recordsFromStream++; - } - } - } else { - assertEquals(CSV_MIME_TYPE, flowFile.getAttribute(CoreAttributes.MIME_TYPE.key())); - BufferedReader br = new BufferedReader(new InputStreamReader(in)); - - String headerRow = br.readLine(); - // Derby capitalizes column names - assertEquals("PERSONID,PERSONNAME,PERSONCODE", headerRow); - - // Validate rows - String line; - while ((line = br.readLine()) != null) { - recordsFromStream++; - String[] values = line.split(","); - if (recordsFromStream < (NUM_OF_ROWS - 10)) { - assertEquals(3, values.length); - assertTrue(values[1].startsWith("\"")); - assertTrue(values[1].endsWith("\"")); - } else { - assertEquals(2, values.length); // Middle value is null - } - } - } - - final long executionTime = Long.parseLong(flowFile.getAttribute(SelectHiveQL.RESULT_QUERY_EXECUTION_TIME)); - final long fetchTime = Long.parseLong(flowFile.getAttribute(SelectHiveQL.RESULT_QUERY_FETCH_TIME)); - final long durationTime = Long.parseLong(flowFile.getAttribute(SelectHiveQL.RESULT_QUERY_DURATION)); - - assertEquals(NUM_OF_ROWS - 10, recordsFromStream); - assertEquals(recordsFromStream, Integer.parseInt(flowFile.getAttribute(SelectHiveQL.RESULT_ROW_COUNT))); - assertEquals(durationTime, fetchTime + executionTime); - flowFile.assertAttributeEquals(AbstractHiveQLProcessor.ATTR_INPUT_TABLES, "persons"); - } - - public TestRunner doOnTrigger(final String query, final boolean incomingFlowFile, String outputFormat, - String preQueries, String postQueries) - throws InitializationException, ClassNotFoundException, SQLException, IOException { - - // remove previous test database, if any - final File dbLocation = new File(DB_LOCATION); - dbLocation.delete(); - - // load test data to database - final Connection con = ((HiveDBCPService) runner.getControllerService("dbcp")).getConnection(); - final Statement stmt = con.createStatement(); - try { - stmt.execute("drop table persons"); - } catch (final SQLException sqle) { - // Nothing to do here, the table didn't exist - } - - stmt.execute("create table persons (id integer, name varchar(100), code integer)"); - Random rng = new Random(53496); - stmt.executeUpdate("insert into persons values (1, 'Joe Smith', " + rng.nextInt(469947) + ")"); - for (int i = 2; i < NUM_OF_ROWS; i++) { - stmt.executeUpdate("insert into persons values (" + i + ", 'Someone Else', " + rng.nextInt(469947) + ")"); - } - stmt.executeUpdate("insert into persons values (" + NUM_OF_ROWS + ", 'Last Person', NULL)"); - - LOGGER.info("test data loaded"); - - runner.setProperty(SelectHiveQL.HIVEQL_SELECT_QUERY, query); - runner.setProperty(HIVEQL_OUTPUT_FORMAT, outputFormat); - if (preQueries != null) { - runner.setProperty(SelectHiveQL.HIVEQL_PRE_QUERY, preQueries); - } - if (postQueries != null) { - runner.setProperty(SelectHiveQL.HIVEQL_POST_QUERY, postQueries); - } - - if (incomingFlowFile) { - // incoming FlowFile content is not used, but attributes are used - final Map attributes = new HashMap<>(); - attributes.put("person.id", "10"); - runner.enqueue("Hello".getBytes(), attributes); - } - - runner.setIncomingConnection(incomingFlowFile); - runner.run(); - - return runner; - } - - @Test - public void testMaxRowsPerFlowFileAvro() throws ClassNotFoundException, SQLException, InitializationException, IOException { - - // load test data to database - final Connection con = ((DBCPService) runner.getControllerService("dbcp")).getConnection(); - Statement stmt = con.createStatement(); - InputStream in; - MockFlowFile mff; - - try { - stmt.execute("drop table TEST_QUERY_DB_TABLE"); - } catch (final SQLException sqle) { - // Ignore this error, probably a "table does not exist" since Derby doesn't yet support DROP IF EXISTS [DERBY-4842] - } - - stmt.execute("create table TEST_QUERY_DB_TABLE (id integer not null, name varchar(100), scale float, created_on timestamp, bignum bigint default 0)"); - int rowCount = 0; - //create larger row set - for (int batch = 0; batch < 100; batch++) { - stmt.execute("insert into TEST_QUERY_DB_TABLE (id, name, scale, created_on) VALUES (" + rowCount + ", 'Joe Smith', 1.0, '1962-09-23 03:23:34.234')"); - rowCount++; - } - - runner.setIncomingConnection(false); - runner.setProperty(SelectHiveQL.HIVEQL_SELECT_QUERY, "SELECT * FROM TEST_QUERY_DB_TABLE"); - runner.setProperty(SelectHiveQL.MAX_ROWS_PER_FLOW_FILE, "${" + MAX_ROWS_KEY + "}"); - runner.setProperty(SelectHiveQL.HIVEQL_OUTPUT_FORMAT, HiveJdbcCommon.AVRO); - runner.setVariable(MAX_ROWS_KEY, "9"); - - runner.run(); - runner.assertAllFlowFilesTransferred(SelectHiveQL.REL_SUCCESS, 12); - runner.assertAllFlowFilesContainAttribute(SelectHiveQL.REL_SUCCESS, SelectHiveQL.RESULT_ROW_COUNT); - runner.assertAllFlowFilesContainAttribute(SelectHiveQL.REL_SUCCESS, SelectHiveQL.RESULT_QUERY_DURATION); - runner.assertAllFlowFilesContainAttribute(SelectHiveQL.REL_SUCCESS, SelectHiveQL.RESULT_QUERY_EXECUTION_TIME); - runner.assertAllFlowFilesContainAttribute(SelectHiveQL.REL_SUCCESS, SelectHiveQL.RESULT_QUERY_FETCH_TIME); - - //ensure all but the last file have 9 records each - for (int ff = 0; ff < 11; ff++) { - mff = runner.getFlowFilesForRelationship(SelectHiveQL.REL_SUCCESS).get(ff); - final long executionTime = Long.parseLong(mff.getAttribute(SelectHiveQL.RESULT_QUERY_EXECUTION_TIME)); - final long fetchTime = Long.parseLong(mff.getAttribute(SelectHiveQL.RESULT_QUERY_FETCH_TIME)); - final long durationTime = Long.parseLong(mff.getAttribute(SelectHiveQL.RESULT_QUERY_DURATION)); - - assertEquals(durationTime, fetchTime + executionTime); - - in = new ByteArrayInputStream(mff.toByteArray()); - assertEquals(9, getNumberOfRecordsFromStream(in)); - - mff.assertAttributeExists("fragment.identifier"); - assertEquals(Integer.toString(ff), mff.getAttribute("fragment.index")); - assertEquals("12", mff.getAttribute("fragment.count")); - } - - //last file should have 1 record - mff = runner.getFlowFilesForRelationship(SelectHiveQL.REL_SUCCESS).get(11); - final long executionTime = Long.parseLong(mff.getAttribute(SelectHiveQL.RESULT_QUERY_EXECUTION_TIME)); - final long fetchTime = Long.parseLong(mff.getAttribute(SelectHiveQL.RESULT_QUERY_FETCH_TIME)); - final long durationTime = Long.parseLong(mff.getAttribute(SelectHiveQL.RESULT_QUERY_DURATION)); - - assertEquals(durationTime, fetchTime + executionTime); - - in = new ByteArrayInputStream(mff.toByteArray()); - assertEquals(1, getNumberOfRecordsFromStream(in)); - mff.assertAttributeExists("fragment.identifier"); - assertEquals(Integer.toString(11), mff.getAttribute("fragment.index")); - assertEquals("12", mff.getAttribute("fragment.count")); - runner.clearTransferState(); - } - - @Test - public void testParametrizedQuery() throws ClassNotFoundException, SQLException, InitializationException, IOException { - // load test data to database - final Connection con = ((DBCPService) runner.getControllerService("dbcp")).getConnection(); - Statement stmt = con.createStatement(); - - try { - stmt.execute("drop table TEST_QUERY_DB_TABLE"); - } catch (final SQLException sqle) { - // Ignore this error, probably a "table does not exist" since Derby doesn't yet support DROP IF EXISTS [DERBY-4842] - } - - stmt.execute("create table TEST_QUERY_DB_TABLE (id integer not null, name varchar(100), scale float, created_on timestamp, bignum bigint default 0)"); - int rowCount = 0; - //create larger row set - for (int batch = 0; batch < 100; batch++) { - stmt.execute("insert into TEST_QUERY_DB_TABLE (id, name, scale, created_on) VALUES (" + rowCount + ", 'Joe Smith', 1.0, '1962-09-23 03:23:34.234')"); - rowCount++; - } - - runner.setIncomingConnection(true); - runner.setProperty(SelectHiveQL.MAX_ROWS_PER_FLOW_FILE, "${" + MAX_ROWS_KEY + "}"); - runner.setProperty(SelectHiveQL.HIVEQL_OUTPUT_FORMAT, HiveJdbcCommon.AVRO); - runner.setVariable(MAX_ROWS_KEY, "9"); - - Map attributes = new HashMap(); - attributes.put("hiveql.args.1.value", "1"); - attributes.put("hiveql.args.1.type", String.valueOf(Types.INTEGER)); - runner.enqueue("SELECT * FROM TEST_QUERY_DB_TABLE WHERE id = ?", attributes ); - - runner.run(); - runner.assertAllFlowFilesTransferred(SelectHiveQL.REL_SUCCESS, 1); - runner.assertAllFlowFilesContainAttribute(SelectHiveQL.REL_SUCCESS, SelectHiveQL.RESULT_ROW_COUNT); - runner.assertAllFlowFilesContainAttribute(SelectHiveQL.REL_SUCCESS, SelectHiveQL.RESULT_QUERY_DURATION); - runner.assertAllFlowFilesContainAttribute(SelectHiveQL.REL_SUCCESS, SelectHiveQL.RESULT_QUERY_EXECUTION_TIME); - runner.assertAllFlowFilesContainAttribute(SelectHiveQL.REL_SUCCESS, SelectHiveQL.RESULT_QUERY_FETCH_TIME); - - MockFlowFile flowFile = runner.getFlowFilesForRelationship(SelectHiveQL.REL_SUCCESS).get(0); - final long executionTime = Long.parseLong(flowFile.getAttribute(SelectHiveQL.RESULT_QUERY_EXECUTION_TIME)); - final long fetchTime = Long.parseLong(flowFile.getAttribute(SelectHiveQL.RESULT_QUERY_FETCH_TIME)); - final long durationTime = Long.parseLong(flowFile.getAttribute(SelectHiveQL.RESULT_QUERY_DURATION)); - - assertEquals(durationTime, fetchTime + executionTime); - - // Assert the attributes from the incoming flow file are preserved in the outgoing flow file(s) - flowFile.assertAttributeEquals("hiveql.args.1.value", "1"); - flowFile.assertAttributeEquals("hiveql.args.1.type", String.valueOf(Types.INTEGER)); - runner.clearTransferState(); - } - - @Test - public void testMaxRowsPerFlowFileCSV() throws ClassNotFoundException, SQLException, InitializationException, IOException { - - // load test data to database - final Connection con = ((DBCPService) runner.getControllerService("dbcp")).getConnection(); - Statement stmt = con.createStatement(); - InputStream in; - MockFlowFile mff; - - try { - stmt.execute("drop table TEST_QUERY_DB_TABLE"); - } catch (final SQLException sqle) { - // Ignore this error, probably a "table does not exist" since Derby doesn't yet support DROP IF EXISTS [DERBY-4842] - } - - stmt.execute("create table TEST_QUERY_DB_TABLE (id integer not null, name varchar(100), scale float, created_on timestamp, bignum bigint default 0)"); - int rowCount = 0; - //create larger row set - for (int batch = 0; batch < 100; batch++) { - stmt.execute("insert into TEST_QUERY_DB_TABLE (id, name, scale, created_on) VALUES (" + rowCount + ", 'Joe Smith', 1.0, '1962-09-23 03:23:34.234')"); - rowCount++; - } - - runner.setIncomingConnection(true); - runner.setProperty(SelectHiveQL.MAX_ROWS_PER_FLOW_FILE, "${" + MAX_ROWS_KEY + "}"); - runner.setProperty(SelectHiveQL.HIVEQL_OUTPUT_FORMAT, HiveJdbcCommon.CSV); - - runner.enqueue("SELECT * FROM TEST_QUERY_DB_TABLE", new HashMap() {{ - put(MAX_ROWS_KEY, "9"); - }}); - - runner.run(); - runner.assertAllFlowFilesTransferred(SelectHiveQL.REL_SUCCESS, 12); - runner.assertAllFlowFilesContainAttribute(SelectHiveQL.REL_SUCCESS, SelectHiveQL.RESULT_ROW_COUNT); - runner.assertAllFlowFilesContainAttribute(SelectHiveQL.REL_SUCCESS, SelectHiveQL.RESULT_QUERY_DURATION); - runner.assertAllFlowFilesContainAttribute(SelectHiveQL.REL_SUCCESS, SelectHiveQL.RESULT_QUERY_EXECUTION_TIME); - runner.assertAllFlowFilesContainAttribute(SelectHiveQL.REL_SUCCESS, SelectHiveQL.RESULT_QUERY_FETCH_TIME); - - //ensure all but the last file have 9 records (10 lines = 9 records + header) each - for (int ff = 0; ff < 11; ff++) { - mff = runner.getFlowFilesForRelationship(SelectHiveQL.REL_SUCCESS).get(ff); - final long executionTime = Long.parseLong(mff.getAttribute(SelectHiveQL.RESULT_QUERY_EXECUTION_TIME)); - final long fetchTime = Long.parseLong(mff.getAttribute(SelectHiveQL.RESULT_QUERY_FETCH_TIME)); - final long durationTime = Long.parseLong(mff.getAttribute(SelectHiveQL.RESULT_QUERY_DURATION)); - - assertEquals(durationTime, fetchTime + executionTime); - - in = new ByteArrayInputStream(mff.toByteArray()); - BufferedReader br = new BufferedReader(new InputStreamReader(in)); - assertEquals(10, br.lines().count()); - - mff.assertAttributeExists("fragment.identifier"); - assertEquals(Integer.toString(ff), mff.getAttribute("fragment.index")); - assertEquals("12", mff.getAttribute("fragment.count")); - } - - //last file should have 1 record (2 lines = 1 record + header) - mff = runner.getFlowFilesForRelationship(SelectHiveQL.REL_SUCCESS).get(11); - final long executionTime = Long.parseLong(mff.getAttribute(SelectHiveQL.RESULT_QUERY_EXECUTION_TIME)); - final long fetchTime = Long.parseLong(mff.getAttribute(SelectHiveQL.RESULT_QUERY_FETCH_TIME)); - final long durationTime = Long.parseLong(mff.getAttribute(SelectHiveQL.RESULT_QUERY_DURATION)); - - assertEquals(durationTime, fetchTime + executionTime); - - in = new ByteArrayInputStream(mff.toByteArray()); - BufferedReader br = new BufferedReader(new InputStreamReader(in)); - assertEquals(2, br.lines().count()); - mff.assertAttributeExists("fragment.identifier"); - assertEquals(Integer.toString(11), mff.getAttribute("fragment.index")); - assertEquals("12", mff.getAttribute("fragment.count")); - runner.clearTransferState(); - } - - @Test - public void testMaxRowsPerFlowFileWithMaxFragments() throws ClassNotFoundException, SQLException, InitializationException, IOException { - - // load test data to database - final Connection con = ((DBCPService) runner.getControllerService("dbcp")).getConnection(); - Statement stmt = con.createStatement(); - InputStream in; - MockFlowFile mff; - - try { - stmt.execute("drop table TEST_QUERY_DB_TABLE"); - } catch (final SQLException sqle) { - // Ignore this error, probably a "table does not exist" since Derby doesn't yet support DROP IF EXISTS [DERBY-4842] - } - - stmt.execute("create table TEST_QUERY_DB_TABLE (id integer not null, name varchar(100), scale float, created_on timestamp, bignum bigint default 0)"); - int rowCount = 0; - //create larger row set - for (int batch = 0; batch < 100; batch++) { - stmt.execute("insert into TEST_QUERY_DB_TABLE (id, name, scale, created_on) VALUES (" + rowCount + ", 'Joe Smith', 1.0, '1962-09-23 03:23:34.234')"); - rowCount++; - } - - runner.setIncomingConnection(false); - runner.setProperty(SelectHiveQL.HIVEQL_SELECT_QUERY, "SELECT * FROM TEST_QUERY_DB_TABLE"); - runner.setProperty(SelectHiveQL.MAX_ROWS_PER_FLOW_FILE, "9"); - Integer maxFragments = 3; - runner.setProperty(SelectHiveQL.MAX_FRAGMENTS, maxFragments.toString()); - - runner.run(); - runner.assertAllFlowFilesTransferred(SelectHiveQL.REL_SUCCESS, maxFragments); - runner.assertAllFlowFilesContainAttribute(SelectHiveQL.REL_SUCCESS, SelectHiveQL.RESULT_ROW_COUNT); - runner.assertAllFlowFilesContainAttribute(SelectHiveQL.REL_SUCCESS, SelectHiveQL.RESULT_QUERY_DURATION); - runner.assertAllFlowFilesContainAttribute(SelectHiveQL.REL_SUCCESS, SelectHiveQL.RESULT_QUERY_EXECUTION_TIME); - runner.assertAllFlowFilesContainAttribute(SelectHiveQL.REL_SUCCESS, SelectHiveQL.RESULT_QUERY_FETCH_TIME); - - for (int i = 0; i < maxFragments; i++) { - mff = runner.getFlowFilesForRelationship(SelectHiveQL.REL_SUCCESS).get(i); - final long executionTime = Long.parseLong(mff.getAttribute(SelectHiveQL.RESULT_QUERY_EXECUTION_TIME)); - final long fetchTime = Long.parseLong(mff.getAttribute(SelectHiveQL.RESULT_QUERY_FETCH_TIME)); - final long durationTime = Long.parseLong(mff.getAttribute(SelectHiveQL.RESULT_QUERY_DURATION)); - - assertEquals(durationTime, fetchTime + executionTime); - - in = new ByteArrayInputStream(mff.toByteArray()); - assertEquals(9, getNumberOfRecordsFromStream(in)); - - mff.assertAttributeExists("fragment.identifier"); - assertEquals(Integer.toString(i), mff.getAttribute("fragment.index")); - assertEquals(maxFragments.toString(), mff.getAttribute("fragment.count")); - } - - runner.clearTransferState(); - } - - private long getNumberOfRecordsFromStream(InputStream in) throws IOException { - final DatumReader datumReader = new GenericDatumReader<>(); - try (DataFileStream dataFileReader = new DataFileStream<>(in, datumReader)) { - GenericRecord record = null; - long recordsFromStream = 0; - while (dataFileReader.hasNext()) { - // Reuse record object by passing it to next(). This saves us from - // allocating and garbage collecting many objects for files with - // many items. - record = dataFileReader.next(record); - recordsFromStream += 1; - } - - return recordsFromStream; - } - } - - /** - * Simple implementation only for SelectHiveQL processor testing. - */ - private class DBCPServiceSimpleImpl extends AbstractControllerService implements HiveDBCPService { - - @Override - public String getIdentifier() { - return "dbcp"; - } - - @Override - public Connection getConnection() throws ProcessException { - try { - Class.forName("org.apache.derby.jdbc.EmbeddedDriver"); - return DriverManager.getConnection("jdbc:derby:" + DB_LOCATION + ";create=true"); - } catch (final Exception e) { - throw new ProcessException("getConnection failed: " + e); - } - } - - @Override - public String getConnectionURL() { - return "jdbc:derby:" + DB_LOCATION + ";create=true"; - } - } -} \ No newline at end of file diff --git a/nifi-nar-bundles/nifi-hive-bundle/nifi-hive-processors/src/test/java/org/apache/nifi/processors/hive/TestUpdateHiveTable.java b/nifi-nar-bundles/nifi-hive-bundle/nifi-hive-processors/src/test/java/org/apache/nifi/processors/hive/TestUpdateHiveTable.java deleted file mode 100644 index 9905f8938f..0000000000 --- a/nifi-nar-bundles/nifi-hive-bundle/nifi-hive-processors/src/test/java/org/apache/nifi/processors/hive/TestUpdateHiveTable.java +++ /dev/null @@ -1,449 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.nifi.processors.hive; - -import org.apache.commons.io.FileUtils; -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.Path; -import org.apache.nifi.controller.AbstractControllerService; -import org.apache.nifi.dbcp.DBCPService; -import org.apache.nifi.dbcp.hive.HiveDBCPService; -import org.apache.nifi.logging.ComponentLog; -import org.apache.nifi.processor.exception.ProcessException; -import org.apache.nifi.reporting.InitializationException; -import org.apache.nifi.schema.access.SchemaNotFoundException; -import org.apache.nifi.serialization.RecordReader; -import org.apache.nifi.serialization.SimpleRecordSchema; -import org.apache.nifi.serialization.record.MockRecordParser; -import org.apache.nifi.serialization.record.RecordField; -import org.apache.nifi.serialization.record.RecordFieldType; -import org.apache.nifi.util.MockFlowFile; -import org.apache.nifi.util.TestRunner; -import org.apache.nifi.util.TestRunners; -import org.junit.jupiter.api.BeforeEach; -import org.junit.jupiter.api.Test; -import org.junit.jupiter.api.condition.DisabledOnOs; -import org.junit.jupiter.api.condition.OS; -import org.junit.jupiter.api.io.TempDir; -import org.mockito.stubbing.Answer; - -import java.io.File; -import java.io.IOException; -import java.io.InputStream; -import java.sql.Connection; -import java.sql.ResultSet; -import java.sql.SQLException; -import java.sql.Statement; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.HashMap; -import java.util.List; -import java.util.Map; -import java.util.function.BiFunction; - -import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.assertTrue; -import static org.mockito.ArgumentMatchers.anyInt; -import static org.mockito.ArgumentMatchers.anyString; -import static org.mockito.Mockito.mock; -import static org.mockito.Mockito.when; - -@DisabledOnOs(OS.WINDOWS) -public class TestUpdateHiveTable { - - private static final String TEST_CONF_PATH = "src/test/resources/core-site.xml"; - private static final String TARGET_HIVE = "target/hive"; - - private static final String[] SHOW_TABLES_COLUMN_NAMES = new String[]{"tab_name"}; - private static final String[][] SHOW_TABLES_RESULTSET = new String[][]{ - new String[]{"messages"}, - new String[]{"users"}, - }; - - private static final String[] DESC_MESSAGES_TABLE_COLUMN_NAMES = new String[]{"id", "msg"}; - private static final String[][] DESC_MESSAGES_TABLE_RESULTSET = new String[][]{ - new String[]{"# col_name", "data_type", "comment"}, - new String[]{"", null, null}, - new String[]{"id", "int", ""}, - new String[]{"msg", "string", ""}, - new String[]{"", null, null}, - new String[]{"# Partition Information", null, null}, - new String[]{"# col_name", "data_type", "comment"}, - new String[]{"", null, null}, - new String[]{"continent", "string", ""}, - new String[]{"country", "string", ""}, - new String[]{"", null, null}, - new String[]{"# Detailed Table Information", null, null}, - new String[]{"Location:", "hdfs://mycluster:8020/warehouse/tablespace/managed/hive/messages", null} - }; - - private static final String[] DESC_USERS_TABLE_COLUMN_NAMES = new String[]{"name", "favorite_number", "favorite_color", "scale"}; - private static final String[][] DESC_USERS_TABLE_RESULTSET = new String[][]{ - new String[]{"name", "string", ""}, - new String[]{"favorite_number", "int", ""}, - new String[]{"favorite_color", "string", ""}, - new String[]{"scale", "double", ""}, - new String[]{"", null, null}, - new String[]{"# Detailed Table Information", null, null}, - new String[]{"Location:", "hdfs://mycluster:8020/warehouse/tablespace/managed/hive/users", null} - }; - private static final String[][] DESC_EXTERNAL_USERS_TABLE_RESULTSET = new String[][]{ - new String[]{"name", "string", ""}, - new String[]{"favorite_number", "int", ""}, - new String[]{"favorite_color", "string", ""}, - new String[]{"scale", "double", ""}, - new String[]{"", null, null}, - new String[]{"# Detailed Table Information", null, null}, - new String[]{"Location:", "hdfs://mycluster:8020/path/to/users", null} - }; - - private static final String[] DESC_NEW_TABLE_COLUMN_NAMES = DESC_USERS_TABLE_COLUMN_NAMES; - private static final String[][] DESC_NEW_TABLE_RESULTSET = new String[][]{ - new String[]{"# col_name", "data_type", "comment"}, - new String[]{"name", "string", ""}, - new String[]{"favorite_number", "int", ""}, - new String[]{"favorite_color", "string", ""}, - new String[]{"scale", "double", ""}, - new String[]{"", null, null}, - new String[]{"# Detailed Table Information", null, null}, - new String[]{"Location:", "hdfs://mycluster:8020/warehouse/tablespace/managed/hive/_newTable", null} - }; - - private TestRunner runner; - private MockUpdateHiveTable processor; - - @BeforeEach - public void setUp() { - - Configuration testConf = new Configuration(); - testConf.addResource(new Path(TEST_CONF_PATH)); - - // Delete any temp files from previous tests - try { - FileUtils.deleteDirectory(new File(TARGET_HIVE)); - } catch (IOException ioe) { - // Do nothing, directory may not have existed - } - - processor = new MockUpdateHiveTable(); - } - - private void configure(final UpdateHiveTable processor, final int numUsers) throws InitializationException { - configure(processor, numUsers, false, -1); - } - - private void configure(final UpdateHiveTable processor, final int numUsers, boolean failOnCreateReader, int failAfter) throws InitializationException { - configure(processor, numUsers, failOnCreateReader, failAfter, null); - } - - private void configure(final UpdateHiveTable processor, final int numUsers, final boolean failOnCreateReader, final int failAfter, - final BiFunction recordGenerator) throws InitializationException { - runner = TestRunners.newTestRunner(processor); - MockRecordParser readerFactory = new MockRecordParser() { - @Override - public RecordReader createRecordReader(Map variables, InputStream in, long inputLength, ComponentLog logger) throws IOException, SchemaNotFoundException { - if (failOnCreateReader) { - throw new SchemaNotFoundException("test"); - } - return super.createRecordReader(variables, in, inputLength, logger); - } - }; - List fields = Arrays.asList( - new RecordField("name", RecordFieldType.STRING.getDataType()), - new RecordField("favorite_number", RecordFieldType.INT.getDataType()), - new RecordField("favorite_color", RecordFieldType.STRING.getDataType()), - new RecordField("scale", RecordFieldType.DOUBLE.getDataType()) - ); - final SimpleRecordSchema recordSchema = new SimpleRecordSchema(fields); - for (final RecordField recordField : recordSchema.getFields()) { - readerFactory.addSchemaField(recordField.getFieldName(), recordField.getDataType().getFieldType(), recordField.isNullable()); - } - - if (recordGenerator == null) { - for (int i = 0; i < numUsers; i++) { - readerFactory.addRecord("name" + i, i, "blue" + i, i * 10.0); - } - } else { - recordGenerator.apply(numUsers, readerFactory); - } - - readerFactory.failAfter(failAfter); - - runner.addControllerService("mock-reader-factory", readerFactory); - runner.enableControllerService(readerFactory); - - runner.setProperty(UpdateHiveTable.RECORD_READER, "mock-reader-factory"); - } - - @Test - public void testSetup(@TempDir java.nio.file.Path tempDir) throws Exception { - configure(processor, 0); - runner.assertNotValid(); - final File dbDir = tempDir.resolve("db").toFile(); - final DBCPService service = new MockHiveConnectionPool(dbDir.getAbsolutePath()); - runner.addControllerService("dbcp", service); - runner.enableControllerService(service); - runner.setProperty(UpdateHiveTable.HIVE_DBCP_SERVICE, "dbcp"); - runner.assertNotValid(); - runner.setProperty(UpdateHiveTable.TABLE_NAME, "users"); - runner.assertValid(); - runner.run(); - } - - - @Test - public void testNoStatementsExecuted() throws Exception { - configure(processor, 1); - runner.setProperty(UpdateHiveTable.TABLE_NAME, "users"); - final MockHiveConnectionPool service = new MockHiveConnectionPool("test"); - runner.addControllerService("dbcp", service); - runner.enableControllerService(service); - runner.setProperty(UpdateHiveTable.HIVE_DBCP_SERVICE, "dbcp"); - runner.setProperty(UpdateHiveTable.PARTITION_CLAUSE, "continent, country"); - HashMap attrs = new HashMap<>(); - attrs.put("continent", "Asia"); - attrs.put("country", "China"); - runner.enqueue(new byte[0], attrs); - runner.run(); - - runner.assertTransferCount(UpdateHiveTable.REL_SUCCESS, 1); - final MockFlowFile flowFile = runner.getFlowFilesForRelationship(UpdateHiveTable.REL_SUCCESS).get(0); - flowFile.assertAttributeEquals(UpdateHiveTable.ATTR_OUTPUT_TABLE, "users"); - flowFile.assertAttributeEquals(UpdateHiveTable.ATTR_OUTPUT_PATH, "hdfs://mycluster:8020/warehouse/tablespace/managed/hive/users"); - assertTrue(service.getExecutedStatements().isEmpty()); - } - - @Test - public void testCreateManagedTable() throws Exception { - configure(processor, 1); - runner.setProperty(UpdateHiveTable.TABLE_NAME, "${table.name}"); - runner.setProperty(UpdateHiveTable.CREATE_TABLE, UpdateHiveTable.CREATE_IF_NOT_EXISTS); - runner.setProperty(UpdateHiveTable.TABLE_STORAGE_FORMAT, UpdateHiveTable.PARQUET); - final MockHiveConnectionPool service = new MockHiveConnectionPool("_newTable"); - runner.addControllerService("dbcp", service); - runner.enableControllerService(service); - runner.setProperty(UpdateHiveTable.HIVE_DBCP_SERVICE, "dbcp"); - Map attrs = new HashMap<>(); - attrs.put("db.name", "default"); - attrs.put("table.name", "_newTable"); - runner.enqueue(new byte[0], attrs); - runner.run(); - - runner.assertTransferCount(UpdateHiveTable.REL_SUCCESS, 1); - final MockFlowFile flowFile = runner.getFlowFilesForRelationship(UpdateHiveTable.REL_SUCCESS).get(0); - flowFile.assertAttributeEquals(UpdateHiveTable.ATTR_OUTPUT_TABLE, "_newTable"); - flowFile.assertAttributeEquals(UpdateHiveTable.ATTR_OUTPUT_PATH, "hdfs://mycluster:8020/warehouse/tablespace/managed/hive/_newTable"); - List statements = service.getExecutedStatements(); - assertEquals(1, statements.size()); - assertEquals("CREATE TABLE IF NOT EXISTS `_newTable` (`name` STRING, `favorite_number` INT, `favorite_color` STRING, `scale` DOUBLE) STORED AS PARQUET", - statements.get(0)); - } - - @Test - public void testCreateManagedTableWithPartition() throws Exception { - configure(processor, 1); - runner.setProperty(UpdateHiveTable.TABLE_NAME, "${table.name}"); - runner.setProperty(UpdateHiveTable.CREATE_TABLE, UpdateHiveTable.CREATE_IF_NOT_EXISTS); - runner.setProperty(UpdateHiveTable.PARTITION_CLAUSE, "age int"); - runner.setProperty(UpdateHiveTable.TABLE_STORAGE_FORMAT, UpdateHiveTable.PARQUET); - final MockHiveConnectionPool service = new MockHiveConnectionPool("_newTable"); - runner.addControllerService("dbcp", service); - runner.enableControllerService(service); - runner.setProperty(UpdateHiveTable.HIVE_DBCP_SERVICE, "dbcp"); - Map attrs = new HashMap<>(); - attrs.put("db.name", "default"); - attrs.put("table.name", "_newTable"); - attrs.put("age", "23"); - runner.enqueue(new byte[0], attrs); - runner.run(); - - runner.assertTransferCount(UpdateHiveTable.REL_SUCCESS, 1); - final MockFlowFile flowFile = runner.getFlowFilesForRelationship(UpdateHiveTable.REL_SUCCESS).get(0); - flowFile.assertAttributeEquals(UpdateHiveTable.ATTR_OUTPUT_TABLE, "_newTable"); - flowFile.assertAttributeEquals(UpdateHiveTable.ATTR_OUTPUT_PATH, "hdfs://mycluster:8020/warehouse/tablespace/managed/hive/_newTable"); - List statements = service.getExecutedStatements(); - assertEquals(1, statements.size()); - assertEquals("CREATE TABLE IF NOT EXISTS `_newTable` (`name` STRING, `favorite_number` INT, `favorite_color` STRING, `scale` DOUBLE) PARTITIONED BY (`age` int) STORED AS PARQUET", - statements.get(0)); - } - - @Test - public void testCreateExternalTable() throws Exception { - configure(processor, 1); - runner.setProperty(UpdateHiveTable.TABLE_NAME, "${table.name}"); - runner.setProperty(UpdateHiveTable.CREATE_TABLE, UpdateHiveTable.CREATE_IF_NOT_EXISTS); - runner.setProperty(UpdateHiveTable.TABLE_MANAGEMENT_STRATEGY, UpdateHiveTable.EXTERNAL_TABLE); - runner.setProperty(UpdateHiveTable.TABLE_STORAGE_FORMAT, UpdateHiveTable.PARQUET); - final MockHiveConnectionPool service = new MockHiveConnectionPool("ext_users"); - runner.addControllerService("dbcp", service); - runner.enableControllerService(service); - runner.setProperty(UpdateHiveTable.HIVE_DBCP_SERVICE, "dbcp"); - runner.assertNotValid(); // Needs location specified - runner.setProperty(UpdateHiveTable.EXTERNAL_TABLE_LOCATION, "/path/to/users"); - runner.assertValid(); - Map attrs = new HashMap<>(); - attrs.put("db.name", "default"); - attrs.put("table.name", "ext_users"); - runner.enqueue(new byte[0], attrs); - runner.run(); - - runner.assertTransferCount(UpdateHiveTable.REL_SUCCESS, 1); - final MockFlowFile flowFile = runner.getFlowFilesForRelationship(UpdateHiveTable.REL_SUCCESS).get(0); - flowFile.assertAttributeEquals(UpdateHiveTable.ATTR_OUTPUT_TABLE, "ext_users"); - flowFile.assertAttributeEquals(UpdateHiveTable.ATTR_OUTPUT_PATH, "hdfs://mycluster:8020/path/to/users"); - List statements = service.getExecutedStatements(); - assertEquals(1, statements.size()); - assertEquals("CREATE EXTERNAL TABLE IF NOT EXISTS `ext_users` (`name` STRING, `favorite_number` INT, `favorite_color` STRING, `scale` DOUBLE) STORED AS PARQUET " - + "LOCATION '/path/to/users'", - statements.get(0)); - } - - @Test - public void testAddColumnsAndPartition() throws Exception { - configure(processor, 1); - runner.setProperty(UpdateHiveTable.TABLE_NAME, "messages"); - final MockHiveConnectionPool service = new MockHiveConnectionPool("test"); - runner.addControllerService("dbcp", service); - runner.enableControllerService(service); - runner.setProperty(UpdateHiveTable.HIVE_DBCP_SERVICE, "dbcp"); - runner.setProperty(UpdateHiveTable.PARTITION_CLAUSE, "continent, country"); - HashMap attrs = new HashMap<>(); - attrs.put("continent", "Asia"); - attrs.put("country", "China"); - runner.enqueue(new byte[0], attrs); - runner.run(); - - runner.assertTransferCount(UpdateHiveTable.REL_SUCCESS, 1); - final MockFlowFile flowFile = runner.getFlowFilesForRelationship(UpdateHiveTable.REL_SUCCESS).get(0); - flowFile.assertAttributeEquals(UpdateHiveTable.ATTR_OUTPUT_TABLE, "messages"); - flowFile.assertAttributeEquals(UpdateHiveTable.ATTR_OUTPUT_PATH, "hdfs://mycluster:8020/warehouse/tablespace/managed/hive/messages/continent=Asia/country=China"); - List statements = service.getExecutedStatements(); - assertEquals(2, statements.size()); - // All columns from users table/data should be added to the table, and a new partition should be added - assertEquals("ALTER TABLE `messages` ADD COLUMNS (`name` STRING, `favorite_number` INT, `favorite_color` STRING, `scale` DOUBLE)", - statements.get(0)); - assertEquals("ALTER TABLE `messages` ADD IF NOT EXISTS PARTITION (`continent`='Asia', `country`='China')", - statements.get(1)); - } - - @Test - public void testMissingPartitionValues() throws Exception { - configure(processor, 1); - runner.setProperty(UpdateHiveTable.TABLE_NAME, "messages"); - final DBCPService service = new MockHiveConnectionPool("test"); - runner.addControllerService("dbcp", service); - runner.enableControllerService(service); - runner.setProperty(UpdateHiveTable.HIVE_DBCP_SERVICE, "dbcp"); - runner.enqueue(new byte[0]); - runner.run(); - - runner.assertTransferCount(UpdateHiveTable.REL_SUCCESS, 0); - runner.assertTransferCount(UpdateHiveTable.REL_FAILURE, 1); - } - - private static final class MockUpdateHiveTable extends UpdateHiveTable { - } - - /** - * Simple implementation only for testing purposes - */ - private static class MockHiveConnectionPool extends AbstractControllerService implements HiveDBCPService { - private final String dbLocation; - - private final List executedStatements = new ArrayList<>(); - - MockHiveConnectionPool(final String dbLocation) { - this.dbLocation = dbLocation; - } - - @Override - public String getIdentifier() { - return "dbcp"; - } - - @Override - public Connection getConnection() throws ProcessException { - try { - Connection conn = mock(Connection.class); - Statement s = mock(Statement.class); - when(conn.createStatement()).thenReturn(s); - when(s.executeQuery(anyString())).thenAnswer((Answer) invocation -> { - final String query = invocation.getArgument(0); - if ("SHOW TABLES".equals(query)) { - return new MockResultSet(SHOW_TABLES_COLUMN_NAMES, SHOW_TABLES_RESULTSET).createResultSet(); - } else if ("DESC FORMATTED `messages`".equals(query)) { - return new MockResultSet(DESC_MESSAGES_TABLE_COLUMN_NAMES, DESC_MESSAGES_TABLE_RESULTSET).createResultSet(); - } else if ("DESC FORMATTED `users`".equals(query)) { - return new MockResultSet(DESC_USERS_TABLE_COLUMN_NAMES, DESC_USERS_TABLE_RESULTSET).createResultSet(); - } else if ("DESC FORMATTED `ext_users`".equals(query)) { - return new MockResultSet(DESC_USERS_TABLE_COLUMN_NAMES, DESC_EXTERNAL_USERS_TABLE_RESULTSET).createResultSet(); - } else if ("DESC FORMATTED `_newTable`".equals(query)) { - return new MockResultSet(DESC_NEW_TABLE_COLUMN_NAMES, DESC_NEW_TABLE_RESULTSET).createResultSet(); - } else { - return new MockResultSet(new String[]{}, new String[][]{new String[]{}}).createResultSet(); - } - }); - when(s.execute(anyString())).thenAnswer((Answer) invocation -> { - executedStatements.add(invocation.getArgument(0)); - return false; - }); - return conn; - } catch (final Exception e) { - e.printStackTrace(); - throw new ProcessException("getConnection failed: " + e); - } - } - - @Override - public String getConnectionURL() { - return "jdbc:fake:" + dbLocation; - } - - List getExecutedStatements() { - return executedStatements; - } - } - - private static class MockResultSet { - String[] colNames; - String[][] data; - int currentRow; - - MockResultSet(String[] colNames, String[][] data) { - this.colNames = colNames; - this.data = data; - currentRow = 0; - } - - ResultSet createResultSet() throws SQLException { - ResultSet rs = mock(ResultSet.class); - when(rs.next()).thenAnswer((Answer) invocation -> (data != null) && (++currentRow <= data.length)); - when(rs.getString(anyInt())).thenAnswer((Answer) invocation -> { - final int index = invocation.getArgument(0); - if (index < 1) { - throw new SQLException("Columns start with index 1"); - } - if (currentRow > data.length) { - throw new SQLException("This result set is already closed"); - } - return data[currentRow - 1][index - 1]; - }); - - return rs; - } - } -} \ No newline at end of file diff --git a/nifi-nar-bundles/nifi-hive-bundle/nifi-hive-processors/src/test/java/org/apache/nifi/util/hive/HiveWriterTest.java b/nifi-nar-bundles/nifi-hive-bundle/nifi-hive-processors/src/test/java/org/apache/nifi/util/hive/HiveWriterTest.java deleted file mode 100644 index 34282fe411..0000000000 --- a/nifi-nar-bundles/nifi-hive-bundle/nifi-hive-processors/src/test/java/org/apache/nifi/util/hive/HiveWriterTest.java +++ /dev/null @@ -1,137 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.nifi.util.hive; - -import com.google.common.util.concurrent.UncheckedExecutionException; -import org.apache.hadoop.hive.conf.HiveConf; -import org.apache.hadoop.security.UserGroupInformation; -import org.apache.hive.hcatalog.streaming.HiveEndPoint; -import org.apache.hive.hcatalog.streaming.InvalidTable; -import org.apache.hive.hcatalog.streaming.RecordWriter; -import org.apache.hive.hcatalog.streaming.StreamingConnection; -import org.apache.hive.hcatalog.streaming.StreamingException; -import org.apache.hive.hcatalog.streaming.TransactionBatch; -import org.junit.jupiter.api.BeforeEach; -import org.junit.jupiter.api.Test; -import org.mockito.stubbing.Answer; - -import java.io.IOException; -import java.lang.reflect.UndeclaredThrowableException; -import java.security.PrivilegedExceptionAction; -import java.util.concurrent.Callable; -import java.util.concurrent.ExecutorService; -import java.util.concurrent.Future; -import java.util.concurrent.TimeUnit; - -import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.assertNotNull; -import static org.junit.jupiter.api.Assertions.assertThrows; -import static org.mockito.ArgumentMatchers.any; -import static org.mockito.ArgumentMatchers.anyLong; -import static org.mockito.ArgumentMatchers.isA; -import static org.mockito.Mockito.mock; -import static org.mockito.Mockito.when; - -public class HiveWriterTest { - private HiveEndPoint hiveEndPoint; - private int txnsPerBatch; - private boolean autoCreatePartitions; - private int callTimeout; - private ExecutorService executorService; - private UserGroupInformation userGroupInformation; - private HiveConf hiveConf; - private HiveWriter hiveWriter; - private StreamingConnection streamingConnection; - private RecordWriter recordWriter; - private Callable recordWriterCallable; - private TransactionBatch transactionBatch; - - @BeforeEach - public void setup() throws Exception { - hiveEndPoint = mock(HiveEndPoint.class); - txnsPerBatch = 100; - autoCreatePartitions = true; - callTimeout = 0; - executorService = mock(ExecutorService.class); - streamingConnection = mock(StreamingConnection.class); - transactionBatch = mock(TransactionBatch.class); - userGroupInformation = mock(UserGroupInformation.class); - hiveConf = mock(HiveConf.class); - recordWriter = mock(RecordWriter.class); - recordWriterCallable = mock(Callable.class); - when(recordWriterCallable.call()).thenReturn(recordWriter); - - when(hiveEndPoint.newConnection(autoCreatePartitions, hiveConf, userGroupInformation)).thenReturn(streamingConnection); - when(streamingConnection.fetchTransactionBatch(txnsPerBatch, recordWriter)).thenReturn(transactionBatch); - when(executorService.submit(isA(Callable.class))).thenAnswer(invocation -> { - Future future = mock(Future.class); - Answer answer = i -> ((Callable) invocation.getArguments()[0]).call(); - when(future.get()).thenAnswer(answer); - when(future.get(anyLong(), any(TimeUnit.class))).thenAnswer(answer); - return future; - }); - when(userGroupInformation.doAs(isA(PrivilegedExceptionAction.class))).thenAnswer(invocation -> { - try { - try { - return ((PrivilegedExceptionAction) invocation.getArguments()[0]).run(); - } catch (UncheckedExecutionException e) { - // Creation of strict json writer will fail due to external deps, this gives us chance to catch it - for (StackTraceElement stackTraceElement : e.getStackTrace()) { - if (stackTraceElement.toString().startsWith("org.apache.hive.hcatalog.streaming.StrictJsonWriter.(")) { - return recordWriterCallable.call(); - } - } - throw e; - } - } catch (IOException | Error | RuntimeException | InterruptedException e) { - throw e; - } catch (Throwable e) { - throw new UndeclaredThrowableException(e); - } - }); - - initWriter(); - } - - private void initWriter() throws Exception { - hiveWriter = new HiveWriter(hiveEndPoint, txnsPerBatch, autoCreatePartitions, callTimeout, executorService, userGroupInformation, hiveConf); - } - - @Test - public void testNormal() { - assertNotNull(hiveWriter); - } - - @Test - public void testNewConnectionInvalidTable() throws Exception { - hiveEndPoint = mock(HiveEndPoint.class); - InvalidTable invalidTable = new InvalidTable("badDb", "badTable"); - when(hiveEndPoint.newConnection(autoCreatePartitions, hiveConf, userGroupInformation)).thenThrow(invalidTable); - HiveWriter.ConnectFailure e = assertThrows(HiveWriter.ConnectFailure.class, () -> initWriter()); - assertEquals(invalidTable, e.getCause()); - } - - @Test - public void testRecordWriterStreamingException() throws Exception { - recordWriterCallable = mock(Callable.class); - StreamingException streamingException = new StreamingException("Test Exception"); - when(recordWriterCallable.call()).thenThrow(streamingException); - HiveWriter.ConnectFailure e = assertThrows(HiveWriter.ConnectFailure.class, () -> initWriter()); - assertEquals(streamingException, e.getCause()); - } -} diff --git a/nifi-nar-bundles/nifi-hive-bundle/nifi-hive-processors/src/test/java/org/apache/nifi/util/orc/TestNiFiOrcUtils.java b/nifi-nar-bundles/nifi-hive-bundle/nifi-hive-processors/src/test/java/org/apache/nifi/util/orc/TestNiFiOrcUtils.java deleted file mode 100644 index d71141f601..0000000000 --- a/nifi-nar-bundles/nifi-hive-bundle/nifi-hive-processors/src/test/java/org/apache/nifi/util/orc/TestNiFiOrcUtils.java +++ /dev/null @@ -1,467 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.nifi.util.orc; - - -import org.apache.avro.Schema; -import org.apache.avro.SchemaBuilder; -import org.apache.avro.generic.GenericData; -import org.apache.avro.util.Utf8; -import org.apache.hadoop.hive.ql.io.orc.NiFiOrcUtils; -import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable; -import org.apache.hadoop.hive.serde2.objectinspector.UnionObject; -import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; -import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; -import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; -import org.apache.hadoop.io.DoubleWritable; -import org.apache.hadoop.io.FloatWritable; -import org.apache.hadoop.io.IntWritable; -import org.apache.hadoop.io.LongWritable; -import org.apache.hadoop.io.Text; -import org.junit.jupiter.api.Test; - -import java.math.BigDecimal; -import java.nio.ByteBuffer; -import java.nio.charset.StandardCharsets; -import java.util.Arrays; -import java.util.Collections; -import java.util.HashMap; -import java.util.List; -import java.util.Map; - -import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.assertThrows; -import static org.junit.jupiter.api.Assertions.assertTrue; - -/** - * Unit tests for the NiFiOrcUtils helper class - */ -public class TestNiFiOrcUtils { - - @Test - public void test_getOrcField_primitive() throws Exception { - // Expected ORC types - TypeInfo[] expectedTypes = { - TypeInfoFactory.getPrimitiveTypeInfo("int"), - TypeInfoFactory.getPrimitiveTypeInfo("bigint"), - TypeInfoFactory.getPrimitiveTypeInfo("boolean"), - TypeInfoFactory.getPrimitiveTypeInfo("float"), - TypeInfoFactory.getPrimitiveTypeInfo("double"), - TypeInfoFactory.getPrimitiveTypeInfo("binary"), - TypeInfoFactory.getPrimitiveTypeInfo("string") - }; - - // Build a fake Avro record with all types - Schema testSchema = buildPrimitiveAvroSchema(); - List fields = testSchema.getFields(); - for (int i = 0; i < fields.size(); i++) { - assertEquals(expectedTypes[i], NiFiOrcUtils.getOrcField(fields.get(i).schema())); - } - - } - - @Test - public void test_getOrcField_union_optional_type() throws Exception { - final SchemaBuilder.FieldAssembler builder = SchemaBuilder.record("testRecord").namespace("any.data").fields(); - builder.name("union").type().unionOf().nullBuilder().endNull().and().booleanType().endUnion().noDefault(); - Schema testSchema = builder.endRecord(); - TypeInfo orcType = NiFiOrcUtils.getOrcField(testSchema.getField("union").schema()); - assertEquals(TypeInfoCreator.createBoolean(), orcType); - } - - @Test - public void test_getOrcField_union() throws Exception { - final SchemaBuilder.FieldAssembler builder = SchemaBuilder.record("testRecord").namespace("any.data").fields(); - builder.name("union").type().unionOf().intType().and().booleanType().endUnion().noDefault(); - Schema testSchema = builder.endRecord(); - TypeInfo orcType = NiFiOrcUtils.getOrcField(testSchema.getField("union").schema()); - assertEquals( - TypeInfoFactory.getUnionTypeInfo(Arrays.asList( - TypeInfoCreator.createInt(), - TypeInfoCreator.createBoolean())), - orcType); - } - - @Test - public void test_getOrcField_map() throws Exception { - final SchemaBuilder.FieldAssembler builder = SchemaBuilder.record("testRecord").namespace("any.data").fields(); - builder.name("map").type().map().values().doubleType().noDefault(); - Schema testSchema = builder.endRecord(); - TypeInfo orcType = NiFiOrcUtils.getOrcField(testSchema.getField("map").schema()); - assertEquals( - TypeInfoFactory.getMapTypeInfo( - TypeInfoCreator.createString(), - TypeInfoCreator.createDouble()), - orcType); - } - - @Test - public void test_getOrcField_nested_map() throws Exception { - final SchemaBuilder.FieldAssembler builder = SchemaBuilder.record("testRecord").namespace("any.data").fields(); - builder.name("map").type().map().values().map().values().doubleType().noDefault(); - Schema testSchema = builder.endRecord(); - TypeInfo orcType = NiFiOrcUtils.getOrcField(testSchema.getField("map").schema()); - assertEquals( - TypeInfoFactory.getMapTypeInfo(TypeInfoCreator.createString(), - TypeInfoFactory.getMapTypeInfo(TypeInfoCreator.createString(), TypeInfoCreator.createDouble())), - orcType); - } - - @Test - public void test_getOrcField_array() throws Exception { - final SchemaBuilder.FieldAssembler builder = SchemaBuilder.record("testRecord").namespace("any.data").fields(); - builder.name("array").type().array().items().longType().noDefault(); - Schema testSchema = builder.endRecord(); - TypeInfo orcType = NiFiOrcUtils.getOrcField(testSchema.getField("array").schema()); - assertEquals( - TypeInfoFactory.getListTypeInfo(TypeInfoCreator.createLong()), - orcType); - } - - @Test - public void test_getOrcField_complex_array() throws Exception { - final SchemaBuilder.FieldAssembler builder = SchemaBuilder.record("testRecord").namespace("any.data").fields(); - builder.name("array").type().array().items().map().values().floatType().noDefault(); - Schema testSchema = builder.endRecord(); - TypeInfo orcType = NiFiOrcUtils.getOrcField(testSchema.getField("array").schema()); - assertEquals( - TypeInfoFactory.getListTypeInfo(TypeInfoFactory.getMapTypeInfo(TypeInfoCreator.createString(), TypeInfoCreator.createFloat())), - orcType); - } - - @Test - public void test_getOrcField_record() throws Exception { - final SchemaBuilder.FieldAssembler builder = SchemaBuilder.record("testRecord").namespace("any.data").fields(); - builder.name("int").type().intType().noDefault(); - builder.name("long").type().longType().longDefault(1L); - builder.name("array").type().array().items().stringType().noDefault(); - Schema testSchema = builder.endRecord(); - TypeInfo orcType = NiFiOrcUtils.getOrcField(testSchema); - assertEquals( - TypeInfoFactory.getStructTypeInfo( - Arrays.asList("int", "long", "array"), - Arrays.asList( - TypeInfoCreator.createInt(), - TypeInfoCreator.createLong(), - TypeInfoFactory.getListTypeInfo(TypeInfoCreator.createString()))), - orcType); - } - - @Test - public void test_getOrcField_enum() throws Exception { - final SchemaBuilder.FieldAssembler builder = SchemaBuilder.record("testRecord").namespace("any.data").fields(); - builder.name("enumField").type().enumeration("enum").symbols("a", "b", "c").enumDefault("a"); - Schema testSchema = builder.endRecord(); - TypeInfo orcType = NiFiOrcUtils.getOrcField(testSchema.getField("enumField").schema()); - assertEquals(TypeInfoCreator.createString(), orcType); - } - - @Test - public void test_getPrimitiveOrcTypeFromPrimitiveAvroType() throws Exception { - // Expected ORC types - TypeInfo[] expectedTypes = { - TypeInfoCreator.createInt(), - TypeInfoCreator.createLong(), - TypeInfoCreator.createBoolean(), - TypeInfoCreator.createFloat(), - TypeInfoCreator.createDouble(), - TypeInfoCreator.createBinary(), - TypeInfoCreator.createString(), - }; - - Schema testSchema = buildPrimitiveAvroSchema(); - List fields = testSchema.getFields(); - for (int i = 0; i < fields.size(); i++) { - assertEquals(expectedTypes[i], NiFiOrcUtils.getPrimitiveOrcTypeFromPrimitiveAvroType(fields.get(i).schema().getType())); - } - } - - @Test - public void test_getPrimitiveOrcTypeFromPrimitiveAvroType_badType() throws Exception { - Schema.Type nonPrimitiveType = Schema.Type.ARRAY; - assertThrows(IllegalArgumentException.class, () -> NiFiOrcUtils.getPrimitiveOrcTypeFromPrimitiveAvroType(nonPrimitiveType)); - } - - @Test - public void test_getWritable() throws Exception { - assertTrue(NiFiOrcUtils.convertToORCObject(null, 1) instanceof IntWritable); - assertTrue(NiFiOrcUtils.convertToORCObject(null, 1L) instanceof LongWritable); - assertTrue(NiFiOrcUtils.convertToORCObject(null, 1.0f) instanceof FloatWritable); - assertTrue(NiFiOrcUtils.convertToORCObject(null, 1.0) instanceof DoubleWritable); - assertTrue(NiFiOrcUtils.convertToORCObject(null, BigDecimal.valueOf(1.0D)) instanceof HiveDecimalWritable); - assertTrue(NiFiOrcUtils.convertToORCObject(null, new int[]{1, 2, 3}) instanceof List); - assertTrue(NiFiOrcUtils.convertToORCObject(null, Arrays.asList(1, 2, 3)) instanceof List); - Map map = new HashMap<>(); - map.put("Hello", 1.0f); - map.put("World", 2.0f); - - Object convMap = NiFiOrcUtils.convertToORCObject(TypeInfoUtils.getTypeInfoFromTypeString("map"), map); - assertTrue(convMap instanceof Map); - ((Map) convMap).forEach((key, value) -> { - assertTrue(key instanceof Text); - assertTrue(value instanceof FloatWritable); - }); - } - - @Test - public void test_getHiveTypeFromAvroType_primitive() throws Exception { - // Expected ORC types - String[] expectedTypes = { - "INT", - "BIGINT", - "BOOLEAN", - "FLOAT", - "DOUBLE", - "BINARY", - "STRING", - }; - - Schema testSchema = buildPrimitiveAvroSchema(); - List fields = testSchema.getFields(); - for (int i = 0; i < fields.size(); i++) { - assertEquals(expectedTypes[i], NiFiOrcUtils.getHiveTypeFromAvroType(fields.get(i).schema())); - } - } - - @Test - public void test_getHiveTypeFromAvroType_complex() throws Exception { - // Expected ORC types - String[] expectedTypes = { - "INT", - "MAP", - "STRING", - "UNIONTYPE", - "ARRAY", - "DECIMAL(10,2)" - }; - - Schema testSchema = buildComplexAvroSchema(); - List fields = testSchema.getFields(); - for (int i = 0; i < fields.size(); i++) { - assertEquals(expectedTypes[i], NiFiOrcUtils.getHiveTypeFromAvroType(fields.get(i).schema())); - } - - assertEquals("STRUCT, myEnum:STRING, myLongOrFloat:UNIONTYPE, myIntList:ARRAY, myDecimal:DECIMAL(10,2)>", - NiFiOrcUtils.getHiveTypeFromAvroType(testSchema)); - } - - @Test - public void test_generateHiveDDL_primitive() throws Exception { - Schema avroSchema = buildPrimitiveAvroSchema(); - String ddl = NiFiOrcUtils.generateHiveDDL(avroSchema, "myHiveTable"); - assertEquals("CREATE EXTERNAL TABLE IF NOT EXISTS myHiveTable (int INT, long BIGINT, boolean BOOLEAN, float FLOAT, double DOUBLE, bytes BINARY, string STRING)" - + " STORED AS ORC", ddl); - } - - @Test - public void test_generateHiveDDL_complex() throws Exception { - Schema avroSchema = buildComplexAvroSchema(); - String ddl = NiFiOrcUtils.generateHiveDDL(avroSchema, "myHiveTable"); - assertEquals("CREATE EXTERNAL TABLE IF NOT EXISTS myHiveTable " - + "(myInt INT, myMap MAP, myEnum STRING, myLongOrFloat UNIONTYPE, myIntList ARRAY, myDecimal DECIMAL(10,2))" - + " STORED AS ORC", ddl); - } - - @Test - public void test_convertToORCObject() { - Schema schema = SchemaBuilder.enumeration("myEnum").symbols("x", "y", "z"); - List objects = Arrays.asList(new Utf8("Hello"), new GenericData.EnumSymbol(schema, "x")); - objects.forEach((avroObject) -> { - Object o = NiFiOrcUtils.convertToORCObject(TypeInfoUtils.getTypeInfoFromTypeString("uniontype"), avroObject); - assertTrue(o instanceof UnionObject); - UnionObject uo = (UnionObject) o; - assertTrue(uo.getObject() instanceof Text); - }); - } - - @Test - public void test_convertToORCObjectBadUnion() { - assertThrows(IllegalArgumentException.class, () -> NiFiOrcUtils.convertToORCObject(TypeInfoUtils.getTypeInfoFromTypeString("uniontype"), "Hello")); - } - - - ////////////////// - // Helper methods - ////////////////// - - public static Schema buildPrimitiveAvroSchema() { - // Build a fake Avro record with all primitive types - final SchemaBuilder.FieldAssembler builder = SchemaBuilder.record("test.record").namespace("any.data").fields(); - builder.name("int").type().intType().noDefault(); - builder.name("long").type().longType().longDefault(1L); - builder.name("boolean").type().booleanType().booleanDefault(true); - builder.name("float").type().floatType().floatDefault(0.0f); - builder.name("double").type().doubleType().doubleDefault(0.0); - builder.name("bytes").type().bytesType().noDefault(); - builder.name("string").type().stringType().stringDefault("default"); - return builder.endRecord(); - } - - public static Schema buildAvroSchemaWithNull() { - // Build a fake Avro record which contains null - final SchemaBuilder.FieldAssembler builder = SchemaBuilder.record("test.record").namespace("any.data").fields(); - builder.name("string").type().stringType().stringDefault("default"); - builder.name("null").type().nullType().noDefault(); - return builder.endRecord(); - } - - public static Schema buildAvroSchemaWithEmptyArray() { - // Build a fake Avro record which contains empty array - final SchemaBuilder.FieldAssembler builder = SchemaBuilder.record("test.record").namespace("any.data").fields(); - builder.name("string").type().stringType().stringDefault("default"); - builder.name("emptyArray").type().array().items().nullType().noDefault(); - return builder.endRecord(); - } - - public static Schema buildAvroSchemaWithFixed() { - // Build a fake Avro record which contains null - final SchemaBuilder.FieldAssembler builder = SchemaBuilder.record("test.record").namespace("any.data").fields(); - builder.name("fixed").type().fixed("fixedField").size(6).fixedDefault("123456"); - return builder.endRecord(); - } - - public static GenericData.Record buildPrimitiveAvroRecord(int i, long l, boolean b, float f, double d, ByteBuffer bytes, String string) { - Schema schema = buildPrimitiveAvroSchema(); - GenericData.Record row = new GenericData.Record(schema); - row.put("int", i); - row.put("long", l); - row.put("boolean", b); - row.put("float", f); - row.put("double", d); - row.put("bytes", bytes); - row.put("string", string); - return row; - } - - public static TypeInfo buildPrimitiveOrcSchema() { - return TypeInfoFactory.getStructTypeInfo(Arrays.asList("int", "long", "boolean", "float", "double", "bytes", "string"), - Arrays.asList( - TypeInfoCreator.createInt(), - TypeInfoCreator.createLong(), - TypeInfoCreator.createBoolean(), - TypeInfoCreator.createFloat(), - TypeInfoCreator.createDouble(), - TypeInfoCreator.createBinary(), - TypeInfoCreator.createString())); - } - - public static Schema buildComplexAvroSchema() { - // Build a fake Avro record with nested types - final SchemaBuilder.FieldAssembler builder = SchemaBuilder.record("complex.record").namespace("any.data").fields(); - builder.name("myInt").type().unionOf().nullType().and().intType().endUnion().nullDefault(); - builder.name("myMap").type().map().values().doubleType().noDefault(); - builder.name("myEnum").type().enumeration("myEnum").symbols("ABC", "DEF", "XYZ").enumDefault("ABC"); - builder.name("myLongOrFloat").type().unionOf().longType().and().floatType().endUnion().noDefault(); - builder.name("myIntList").type().array().items().intType().noDefault(); - builder.name("myDecimal").type().bytesBuilder() - .prop("logicalType", "decimal") - .prop("precision", "10") - .prop("scale", "2") - .endBytes().noDefault(); - return builder.endRecord(); - } - - public static GenericData.Record buildComplexAvroRecord(Integer i, Map m, String e, Object unionVal, List intArray, ByteBuffer decimal) { - Schema schema = buildComplexAvroSchema(); - Schema enumSchema = schema.getField("myEnum").schema(); - GenericData.Record row = new GenericData.Record(schema); - row.put("myInt", i); - row.put("myMap", m); - row.put("myEnum", new GenericData.EnumSymbol(enumSchema, e)); - row.put("myLongOrFloat", unionVal); - row.put("myIntList", intArray); - row.put("myDecimal", decimal); - return row; - } - - public static GenericData.Record buildAvroRecordWithNull(String string) { - Schema schema = buildAvroSchemaWithNull(); - GenericData.Record row = new GenericData.Record(schema); - row.put("string", string); - row.put("null", null); - return row; - } - - public static GenericData.Record buildAvroRecordWithEmptyArray(String string) { - Schema schema = buildAvroSchemaWithEmptyArray(); - GenericData.Record row = new GenericData.Record(schema); - row.put("string", string); - row.put("emptyArray", Collections.emptyList()); - return row; - } - - public static GenericData.Record buildAvroRecordWithFixed(String string) { - Schema schema = buildAvroSchemaWithFixed(); - GenericData.Record row = new GenericData.Record(schema); - row.put("fixed", new GenericData.Fixed(schema, string.getBytes(StandardCharsets.UTF_8))); - return row; - } - - public static TypeInfo buildComplexOrcSchema() { - return TypeInfoUtils.getTypeInfoFromTypeString("struct,myEnum:string,myLongOrFloat:uniontype,myIntList:array,myDecimal:decimal(10,2)>"); - } - - public static Schema buildNestedComplexAvroSchema() { - // Build a fake Avro record with nested complex types - final SchemaBuilder.FieldAssembler builder = SchemaBuilder.record("nested.complex.record").namespace("any.data").fields(); - builder.name("myMapOfArray").type().map().values().array().items().doubleType().noDefault(); - builder.name("myArrayOfMap").type().array().items().map().values().stringType().noDefault(); - return builder.endRecord(); - } - - public static GenericData.Record buildNestedComplexAvroRecord(Map> m, List> a) { - Schema schema = buildNestedComplexAvroSchema(); - GenericData.Record row = new GenericData.Record(schema); - row.put("myMapOfArray", m); - row.put("myArrayOfMap", a); - return row; - } - - public static TypeInfo buildNestedComplexOrcSchema() { - return TypeInfoUtils.getTypeInfoFromTypeString("struct>,myArrayOfMap:array>>"); - } - - private static class TypeInfoCreator { - static TypeInfo createInt() { - return TypeInfoFactory.getPrimitiveTypeInfo("int"); - } - - static TypeInfo createLong() { - return TypeInfoFactory.getPrimitiveTypeInfo("bigint"); - } - - static TypeInfo createBoolean() { - return TypeInfoFactory.getPrimitiveTypeInfo("boolean"); - } - - static TypeInfo createFloat() { - return TypeInfoFactory.getPrimitiveTypeInfo("float"); - } - - static TypeInfo createDouble() { - return TypeInfoFactory.getPrimitiveTypeInfo("double"); - } - - static TypeInfo createBinary() { - return TypeInfoFactory.getPrimitiveTypeInfo("binary"); - } - - static TypeInfo createString() { - return TypeInfoFactory.getPrimitiveTypeInfo("string"); - } - } -} diff --git a/nifi-nar-bundles/nifi-hive-bundle/nifi-hive-processors/src/test/resources/array_of_records.avsc b/nifi-nar-bundles/nifi-hive-bundle/nifi-hive-processors/src/test/resources/array_of_records.avsc deleted file mode 100644 index 19cac6e60f..0000000000 --- a/nifi-nar-bundles/nifi-hive-bundle/nifi-hive-processors/src/test/resources/array_of_records.avsc +++ /dev/null @@ -1,38 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - { - "namespace" : "org.apache.nifi", - "name" : "outer_record", - "type" : "record", - "fields" : [ { - "name" : "records", - "type" : { - "type" : "array", - "items" : { - "type" : "record", - "name" : "inner_record", - "fields" : [ { - "name" : "name", - "type" : "string" - }, { - "name" : "age", - "type" : "int" - } ] - } - } - } ] -} \ No newline at end of file diff --git a/nifi-nar-bundles/nifi-hive-bundle/nifi-hive-processors/src/test/resources/core-site-security.xml b/nifi-nar-bundles/nifi-hive-bundle/nifi-hive-processors/src/test/resources/core-site-security.xml deleted file mode 100644 index eefc74ecbe..0000000000 --- a/nifi-nar-bundles/nifi-hive-bundle/nifi-hive-processors/src/test/resources/core-site-security.xml +++ /dev/null @@ -1,30 +0,0 @@ - - - - - - fs.default.name - hdfs://hive - - - hadoop.security.authentication - kerberos - - - hadoop.security.authorization - true - - \ No newline at end of file diff --git a/nifi-nar-bundles/nifi-hive-bundle/nifi-hive-processors/src/test/resources/core-site.xml b/nifi-nar-bundles/nifi-hive-bundle/nifi-hive-processors/src/test/resources/core-site.xml deleted file mode 100644 index 8f4a91a4ec..0000000000 --- a/nifi-nar-bundles/nifi-hive-bundle/nifi-hive-processors/src/test/resources/core-site.xml +++ /dev/null @@ -1,22 +0,0 @@ - - - - - - fs.default.name - hdfs://hive - - \ No newline at end of file diff --git a/nifi-nar-bundles/nifi-hive-bundle/nifi-hive-processors/src/test/resources/fake.keytab b/nifi-nar-bundles/nifi-hive-bundle/nifi-hive-processors/src/test/resources/fake.keytab deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/nifi-nar-bundles/nifi-hive-bundle/nifi-hive-processors/src/test/resources/hive-site-security.xml b/nifi-nar-bundles/nifi-hive-bundle/nifi-hive-processors/src/test/resources/hive-site-security.xml deleted file mode 100644 index 4d64c951a4..0000000000 --- a/nifi-nar-bundles/nifi-hive-bundle/nifi-hive-processors/src/test/resources/hive-site-security.xml +++ /dev/null @@ -1,30 +0,0 @@ - - - - - - fs.default.name - hdfs://hive - - - hive.server2.authentication - KERBEROS - - - hadoop.security.authentication - kerberos - - \ No newline at end of file diff --git a/nifi-nar-bundles/nifi-hive-bundle/nifi-hive-processors/src/test/resources/hive-site.xml b/nifi-nar-bundles/nifi-hive-bundle/nifi-hive-processors/src/test/resources/hive-site.xml deleted file mode 100644 index 7e7f86cf28..0000000000 --- a/nifi-nar-bundles/nifi-hive-bundle/nifi-hive-processors/src/test/resources/hive-site.xml +++ /dev/null @@ -1,22 +0,0 @@ - - - - - - fs.default.name - file:/// - - \ No newline at end of file diff --git a/nifi-nar-bundles/nifi-hive-bundle/nifi-hive-processors/src/test/resources/krb5.conf b/nifi-nar-bundles/nifi-hive-bundle/nifi-hive-processors/src/test/resources/krb5.conf deleted file mode 100644 index 323da39be9..0000000000 --- a/nifi-nar-bundles/nifi-hive-bundle/nifi-hive-processors/src/test/resources/krb5.conf +++ /dev/null @@ -1,10 +0,0 @@ -[libdefaults] - default_realm = EXAMPLE.COM - dns_lookup_kdc = false - dns_lookup_realm = false - -[realms] - EXAMPLE.COM = { - kdc = kerberos.example.com - admin_server = kerberos.example.com - } \ No newline at end of file diff --git a/nifi-nar-bundles/nifi-hive-bundle/nifi-hive-processors/src/test/resources/user.avsc b/nifi-nar-bundles/nifi-hive-bundle/nifi-hive-processors/src/test/resources/user.avsc deleted file mode 100644 index 95ef6e4fd0..0000000000 --- a/nifi-nar-bundles/nifi-hive-bundle/nifi-hive-processors/src/test/resources/user.avsc +++ /dev/null @@ -1,26 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -{"namespace": "example.avro", - "type": "record", - "name": "User", - "fields": [ - {"name": "name", "type": "string"}, - {"name": "favorite_number", "type": ["int", "null"]}, - {"name": "favorite_color", "type": ["string", "null"]}, - {"name": "scale", "type": ["double", "null"]} - ] -} diff --git a/nifi-nar-bundles/nifi-hive-bundle/nifi-hive-services-api/src/main/java/org/apache/nifi/dbcp/hive/Hive_1_1DBCPService.java b/nifi-nar-bundles/nifi-hive-bundle/nifi-hive-services-api/src/main/java/org/apache/nifi/dbcp/hive/Hive_1_1DBCPService.java deleted file mode 100644 index 09c05fbc3e..0000000000 --- a/nifi-nar-bundles/nifi-hive-bundle/nifi-hive-services-api/src/main/java/org/apache/nifi/dbcp/hive/Hive_1_1DBCPService.java +++ /dev/null @@ -1,31 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.nifi.dbcp.hive; - - -import org.apache.nifi.annotation.documentation.CapabilityDescription; -import org.apache.nifi.annotation.documentation.Tags; - -/** - * Definition for Hive 1.1 Database Connection Pooling Service. - * - */ -@Tags({"hive", "dbcp", "jdbc", "database", "connection", "pooling", "store"}) -@CapabilityDescription("Provides Database Connection Pooling Service for Apache Hive 1.1.x. Connections can be asked from pool and returned after usage.") -public interface Hive_1_1DBCPService extends HiveDBCPService { - public String getConnectionURL(); -} diff --git a/nifi-nar-bundles/nifi-hive-bundle/nifi-hive_1_1-nar/pom.xml b/nifi-nar-bundles/nifi-hive-bundle/nifi-hive_1_1-nar/pom.xml deleted file mode 100644 index 08ed65a266..0000000000 --- a/nifi-nar-bundles/nifi-hive-bundle/nifi-hive_1_1-nar/pom.xml +++ /dev/null @@ -1,47 +0,0 @@ - - - - 4.0.0 - - - org.apache.nifi - nifi-hive-bundle - 2.0.0-SNAPSHOT - - - nifi-hive_1_1-nar - nar - - true - true - - ${hive11.hadoop.version} - - - - - org.apache.nifi - nifi-hive-services-api-nar - 2.0.0-SNAPSHOT - nar - - - org.apache.nifi - nifi-hive_1_1-processors - 2.0.0-SNAPSHOT - - - diff --git a/nifi-nar-bundles/nifi-hive-bundle/nifi-hive_1_1-nar/src/main/resources/META-INF/LICENSE b/nifi-nar-bundles/nifi-hive-bundle/nifi-hive_1_1-nar/src/main/resources/META-INF/LICENSE deleted file mode 100644 index 78c5afab04..0000000000 --- a/nifi-nar-bundles/nifi-hive-bundle/nifi-hive_1_1-nar/src/main/resources/META-INF/LICENSE +++ /dev/null @@ -1,329 +0,0 @@ - - Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ - - TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - - 1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - - 2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - - 3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - - 4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - - 5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - - 6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - - 7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - - 8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - - 9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - - END OF TERMS AND CONDITIONS - - APPENDIX: How to apply the Apache License to your work. - - To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "[]" - replaced with your own identifying information. (Don't include - the brackets!) The text should be enclosed in the appropriate - comment syntax for the file format. We also recommend that a - file or class name and description of purpose be included on the - same "printed page" as the copyright notice for easier - identification within third-party archives. - - Copyright [yyyy] [name of copyright owner] - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. - -APACHE NIFI SUBCOMPONENTS: - -The Apache NiFi project contains subcomponents with separate copyright -notices and license terms. Your use of the source code for the these -subcomponents is subject to the terms and conditions of the following -licenses. - The binary distribution of this product bundles 'Bouncy Castle JDK 1.5' - under an MIT style license. - - Copyright (c) 2000 - 2015 The Legion of the Bouncy Castle Inc. (http://www.bouncycastle.org) - - Permission is hereby granted, free of charge, to any person obtaining a copy - of this software and associated documentation files (the "Software"), to deal - in the Software without restriction, including without limitation the rights - to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - copies of the Software, and to permit persons to whom the Software is - furnished to do so, subject to the following conditions: - - The above copyright notice and this permission notice shall be included in - all copies or substantial portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - THE SOFTWARE. - - The binary distribution of this product includes modules from Groovy which bundles ANTLR - SOFTWARE RIGHTS - - ANTLR 1989-2006 Developed by Terence Parr - Partially supported by University of San Francisco & jGuru.com - - We reserve no legal rights to the ANTLR--it is fully in the - public domain. An individual or company may do whatever - they wish with source code distributed with ANTLR or the - code generated by ANTLR, including the incorporation of - ANTLR, or its output, into commerical software. - - We encourage users to develop software with ANTLR. However, - we do ask that credit is given to us for developing - ANTLR. By "credit", we mean that if you use ANTLR or - incorporate any source code into one of your programs - (commercial product, research project, or otherwise) that - you acknowledge this fact somewhere in the documentation, - research report, etc... If you like ANTLR and have - developed a nice tool with the output, please mention that - you developed it using ANTLR. In addition, we ask that the - headers remain intact in our source code. As long as these - guidelines are kept, we expect to continue enhancing this - system and expect to make other tools available as they are - completed. - - The primary ANTLR guy: - - Terence Parr - parrt@cs.usfca.edu - parrt@antlr.org - - The binary distribution of this product includes modules from Groovy which bundles ASM - /*** - * http://asm.objectweb.org/ - * - * ASM: a very small and fast Java bytecode manipulation framework - * Copyright (c) 2000-2005 INRIA, France Telecom - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. Neither the name of the copyright holders nor the names of its - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF - * THE POSSIBILITY OF SUCH DAMAGE. - */ - - - The binary distribution of this product includes modules from Groovy which bundles source from JSR-223 - The following notice applies to the files: - - src/main/org/codehaus/groovy/jsr223/GroovyCompiledScript.java - src/main/org/codehaus/groovy/jsr223/GroovyScriptEngineFactory.java - src/main/org/codehaus/groovy/jsr223/GroovyScriptEngineImpl.java - - - /* - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - * - * Redistribution and use in source and binary forms, with or without modification, are - * permitted provided that the following conditions are met: Redistributions of source code - * must retain the above copyright notice, this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above copyright notice, this list of - * conditions and the following disclaimer in the documentation and/or other materials - * provided with the distribution. Neither the name of the Sun Microsystems nor the names of - * is contributors may be used to endorse or promote products derived from this software - * without specific prior written permission. - - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS - * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY - * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER - * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR - * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON - * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE - * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ diff --git a/nifi-nar-bundles/nifi-hive-bundle/nifi-hive_1_1-nar/src/main/resources/META-INF/NOTICE b/nifi-nar-bundles/nifi-hive-bundle/nifi-hive_1_1-nar/src/main/resources/META-INF/NOTICE deleted file mode 100644 index d09ab6996a..0000000000 --- a/nifi-nar-bundles/nifi-hive-bundle/nifi-hive_1_1-nar/src/main/resources/META-INF/NOTICE +++ /dev/null @@ -1,288 +0,0 @@ -nifi-hive_1_1-nar -Copyright 2014-2023 The Apache Software Foundation - -This product includes software developed at -The Apache Software Foundation (http://www.apache.org/). - -This includes derived works from the Apache Storm (ASLv2 licensed) project (https://github.com/apache/storm): - Copyright 2015 The Apache Software Foundation - The derived work is adapted from - org/apache/storm/hive/common/HiveWriter.java - org/apache/storm/hive/common/HiveOptions.java - and can be found in the org.apache.nifi.util.hive package - -=========================================== -Apache Software License v2 -=========================================== - -The following binary components are provided under the Apache Software License v2 - - (ASLv2) Apache Ant - The following NOTICE information applies: - Apache Ant - Copyright 1999-2016 The Apache Software Foundation - - (ASLv2) Apache Commons Codec - The following NOTICE information applies: - Apache Commons Codec - Copyright 2002-2014 The Apache Software Foundation - - src/test/org/apache/commons/codec/language/DoubleMetaphoneTest.java - contains test data from http://aspell.net/test/orig/batch0.tab. - Copyright (C) 2002 Kevin Atkinson (kevina@gnu.org) - - =============================================================================== - - The content of package org.apache.commons.codec.language.bm has been translated - from the original php source code available at http://stevemorse.org/phoneticinfo.htm - with permission from the original authors. - Original source copyright: - Copyright (c) 2008 Alexander Beider & Stephen P. Morse. - - (ASLv2) Apache Commons DBCP - The following NOTICE information applies: - Apache Commons DBCP - Copyright 2001-2015 The Apache Software Foundation. - - (ASLv2) Apache HttpComponents - The following NOTICE information applies: - Apache HttpComponents Client - Copyright 1999-2016 The Apache Software Foundation - Apache HttpComponents Core - HttpCore - Copyright 2006-2009 The Apache Software Foundation - - (ASLv2) Apache Commons Pool - The following NOTICE information applies: - Apache Commons Pool - Copyright 1999-2009 The Apache Software Foundation. - - (ASLv2) Apache Commons IO - The following NOTICE information applies: - Apache Commons IO - Copyright 2002-2016 The Apache Software Foundation - - (ASLv2) Apache Hive - The following NOTICE information applies: - Apache Hive - Copyright 2008-2015 The Apache Software Foundation - - This product includes software developed by The Apache Software - Foundation (http://www.apache.org/). - - This product includes Jersey (https://jersey.java.net/) - Copyright (c) 2010-2014 Oracle and/or its affiliates. - - This project includes software copyrighted by Microsoft Corporation and - licensed under the Apache License, Version 2.0. - - This project includes software copyrighted by Dell SecureWorks and - licensed under the Apache License, Version 2.0. - - (ASLv2) Jackson JSON processor - The following NOTICE information applies: - # Jackson JSON processor - - Jackson is a high-performance, Free/Open Source JSON processing library. - It was originally written by Tatu Saloranta (tatu.saloranta@iki.fi), and has - been in development since 2007. - It is currently developed by a community of developers, as well as supported - commercially by FasterXML.com. - - ## Licensing - - Jackson core and extension components may licensed under different licenses. - To find the details that apply to this artifact see the accompanying LICENSE file. - For more information, including possible other licensing options, contact - FasterXML.com (http://fasterxml.com). - - ## Credits - - A list of contributors may be found from CREDITS file, which is included - in some artifacts (usually source distributions); but is always available - from the source code management (SCM) system project uses. - - (ASLv2) BoneCP - The following NOTICE information applies: - BoneCP - Copyright 2010 Wallace Wadge - - (ASLv2) Apache Hadoop - The following NOTICE information applies: - The binary distribution of this product bundles binaries of - org.iq80.leveldb:leveldb-api (https://github.com/dain/leveldb), which has the - following notices: - * Copyright 2011 Dain Sundstrom - * Copyright 2011 FuseSource Corp. http://fusesource.com - - The binary distribution of this product bundles binaries of - org.fusesource.hawtjni:hawtjni-runtime (https://github.com/fusesource/hawtjni), - which has the following notices: - * This product includes software developed by FuseSource Corp. - http://fusesource.com - * This product includes software developed at - Progress Software Corporation and/or its subsidiaries or affiliates. - * This product includes software developed by IBM Corporation and others. - - (ASLv2) Apache Commons Lang - The following NOTICE information applies: - Apache Commons Lang - Copyright 2001-2015 The Apache Software Foundation - - (ASLv2) Apache Curator - The following NOTICE information applies: - Apache Curator - Copyright 2013-2014 The Apache Software Foundation - - (ASLv2) Apache Derby - The following NOTICE information applies: - Apache Derby - Copyright 2004-2014 Apache, Apache DB, Apache Derby, Apache Torque, Apache JDO, Apache DDLUtils, - the Derby hat logo, the Apache JDO logo, and the Apache feather logo are trademarks of The Apache Software Foundation. - - (ASLv2) Apache DS - The following NOTICE information applies: - ApacheDS - Copyright 2003-2015 The Apache Software Foundation - - (ASLv2) Apache Geronimo - The following NOTICE information applies: - Apache Geronimo - Copyright 2003-2008 The Apache Software Foundation - - (ASLv2) HTrace Core - The following NOTICE information applies: - In addition, this product includes software dependencies. See - the accompanying LICENSE.txt for a listing of dependencies - that are NOT Apache licensed (with pointers to their licensing) - - Apache HTrace includes an Apache Thrift connector to Zipkin. Zipkin - is a distributed tracing system that is Apache 2.0 Licensed. - Copyright 2012 Twitter, Inc. - - (ASLv2) Jettison - The following NOTICE information applies: - Copyright 2006 Envoi Solutions LLC - - (ASLv2) Jetty - The following NOTICE information applies: - Jetty Web Container - Copyright 1995-2019 Mort Bay Consulting Pty Ltd. - - (ASLv2) Apache log4j - The following NOTICE information applies: - Apache log4j - Copyright 2007 The Apache Software Foundation - - (ASLv2) Parquet MR - The following NOTICE information applies: - Parquet MR - Copyright 2012 Twitter, Inc. - - This project includes code from https://github.com/lemire/JavaFastPFOR - parquet-column/src/main/java/parquet/column/values/bitpacking/LemireBitPacking.java - Apache License Version 2.0 http://www.apache.org/licenses/. - (c) Daniel Lemire, http://lemire.me/en/ - - (ASLv2) Apache Thrift - The following NOTICE information applies: - Apache Thrift - Copyright 2006-2010 The Apache Software Foundation. - - (ASLv2) Apache Twill - The following NOTICE information applies: - Apache Twill - Copyright 2013-2016 The Apache Software Foundation - - (ASLv2) Dropwizard Metrics - The following NOTICE information applies: - Metrics - Copyright 2010-2013 Coda Hale and Yammer, Inc. - - This product includes code derived from the JSR-166 project (ThreadLocalRandom, Striped64, - LongAdder), which was released with the following comments: - - Written by Doug Lea with assistance from members of JCP JSR-166 - Expert Group and released to the public domain, as explained at - http://creativecommons.org/publicdomain/zero/1.0/ - - (ASLv2) Joda Time - The following NOTICE information applies: - This product includes software developed by - Joda.org (http://www.joda.org/). - - (ASLv2) The Netty Project - The following NOTICE information applies: - The Netty Project - Copyright 2011 The Netty Project - - (ASLv2) Apache Tomcat - The following NOTICE information applies: - Apache Tomcat - Copyright 2007 The Apache Software Foundation - - Java Management Extensions (JMX) support is provided by - the MX4J package, which is open source software. The - original software and related information is available - at http://mx4j.sourceforge.net. - - Java compilation software for JSP pages is provided by Eclipse, - which is open source software. The orginal software and - related infomation is available at - http://www.eclipse.org. - - (ASLv2) Apache ZooKeeper - The following NOTICE information applies: - Apache ZooKeeper - Copyright 2009-2012 The Apache Software Foundation - - (ASLv2) Google GSON - The following NOTICE information applies: - Copyright 2008 Google Inc. - - (ASLv2) Groovy (org.codehaus.groovy:groovy-all:jar:2.1.6 - http://www.groovy-lang.org) - The following NOTICE information applies: - Groovy Language - Copyright 2003-2012 The respective authors and developers - Developers and Contributors are listed in the project POM file - and Gradle build file - - This product includes software developed by - The Groovy community (http://groovy.codehaus.org/). - - (ASLv2) JPam - The following NOTICE information applies: - Copyright 2003-2006 Greg Luck - - ************************ - Common Development and Distribution License 1.1 - ************************ - - The following binary components are provided under the Common Development and Distribution License 1.1. See project link for details. - - (CDDL 1.1) (GPL2 w/ CPE) jersey-client (com.sun.jersey:jersey-client:jar:1.9 - https://jersey.java.net) - (CDDL 1.1) (GPL2 w/ CPE) jersey-core (com.sun.jersey:jersey-core:jar:1.9 - https://jersey.java.net/) - (CDDL 1.1) (GPL2 w/ CPE) jersey-json (com.sun.jersey:jersey-json:jar:1.9 - https://jersey.java.net/) - (CDDL 1.1) (GPL2 w/ CPE) jersey-server (com.sun.jersey:jersey-server:jar:1.9 - https://jersey.java.net/) - (CDDL 1.1) (GPL2 w/ CPE) jersey-guice (com.sun.jersey.contribs:jersey-guice:jar:1.9 - https://jersey.java.net/) - (CDDL 1.1) (GPL2 w/ CPE) Java Architecture For XML Binding (javax.xml.bind:jaxb-api:jar:2.2.2 - https://jaxb.dev.java.net/) - (CDDL 1.1) (GPL2 w/ CPE) JavaMail API (compat) (javax.mail:mail:jar:1.4.7 - http://kenai.com/projects/javamail/mail) - - - ************************ - Common Development and Distribution License 1.0 - ************************ - - The following binary components are provided under the Common Development and Distribution License 1.0. See project link for details. - - (CDDL 1.0) JavaServlet(TM) Specification (javax.servlet:servlet-api:jar:2.5 - no url available) - (CDDL 1.0) (GPL3) Streaming API For XML (javax.xml.stream:stax-api:jar:1.0-2 - no url provided) - (CDDL 1.0) JavaBeans Activation Framework (JAF) (javax.activation:activation:jar:1.1 - http://java.sun.com/products/javabeans/jaf/index.jsp) - (CDDL 1.0) JavaServer Pages(TM) API (javax.servlet.jsp:jsp-api:jar:2.1 - http://jsp.java.net) - - ***************** - Public Domain - ***************** - - The following binary components are provided to the 'Public Domain'. See project link for details. - - (Public Domain) AOP Alliance 1.0 (http://aopalliance.sourceforge.net/) diff --git a/nifi-nar-bundles/nifi-hive-bundle/nifi-hive_1_1-processors/pom.xml b/nifi-nar-bundles/nifi-hive-bundle/nifi-hive_1_1-processors/pom.xml deleted file mode 100644 index 7ca20310c7..0000000000 --- a/nifi-nar-bundles/nifi-hive-bundle/nifi-hive_1_1-processors/pom.xml +++ /dev/null @@ -1,225 +0,0 @@ - - - - 4.0.0 - - - org.apache.nifi - nifi-hive-bundle - 2.0.0-SNAPSHOT - - - nifi-hive_1_1-processors - jar - - - - ${hive11.hadoop.version} - - - - - org.apache.nifi - nifi-api - 2.0.0-SNAPSHOT - - - org.apache.nifi - nifi-utils - 2.0.0-SNAPSHOT - - - org.apache.nifi - nifi-put-pattern - 2.0.0-SNAPSHOT - - - org.apache.nifi - nifi-security-kerberos - 2.0.0-SNAPSHOT - - - org.apache.nifi - nifi-dbcp-service-api - 2.0.0-SNAPSHOT - provided - - - org.apache.nifi - nifi-hive-services-api - 2.0.0-SNAPSHOT - provided - - - org.apache.nifi - nifi-kerberos-credentials-service-api - provided - - - - org.codehaus.groovy - groovy-all - 2.4.21 - - - org.apache.hive - hive-jdbc - ${hive11.version} - - - org.json - json - - - org.mockito - mockito-all - - - log4j - log4j - - - log4j - apache-log4j-extras - - - org.slf4j - slf4j-log4j12 - - - commons-logging - commons-logging - - - - - org.apache.hive.hcatalog - hive-hcatalog-streaming - ${hive11.version} - - - org.slf4j - slf4j-log4j12 - - - log4j - log4j - - - log4j - apache-log4j-extras - - - commons-logging - commons-logging - - - - - org.apache.hive.hcatalog - hive-hcatalog-core - ${hive11.version} - - - org.slf4j - slf4j-log4j12 - - - commons-logging - commons-logging - - - - - org.apache.hadoop - hadoop-client - ${hadoop.version} - - - com.google.code.findbugs - jsr305 - - - log4j - log4j - - - log4j - apache-log4j-extras - - - org.slf4j - slf4j-log4j12 - - - commons-logging - commons-logging - - - - - org.apache.nifi - nifi-hadoop-utils - 2.0.0-SNAPSHOT - - - org.apache.nifi - nifi-hadoop-record-utils - 2.0.0-SNAPSHOT - - - org.apache.nifi - nifi-record-serialization-service-api - - - org.apache.nifi - nifi-record - - - com.github.stephenc.findbugs - findbugs-annotations - 1.3.9-1 - - - org.apache.commons - commons-text - - - org.apache.commons - commons-dbcp2 - - - org.slf4j - log4j-over-slf4j - - - org.slf4j - jcl-over-slf4j - - - org.apache.nifi - nifi-mock - 2.0.0-SNAPSHOT - test - - - org.apache.nifi - nifi-mock-record-utils - 2.0.0-SNAPSHOT - test - - - diff --git a/nifi-nar-bundles/nifi-hive-bundle/nifi-hive_1_1-processors/src/main/java/org/apache/nifi/dbcp/hive/Hive_1_1ConnectionPool.java b/nifi-nar-bundles/nifi-hive-bundle/nifi-hive_1_1-processors/src/main/java/org/apache/nifi/dbcp/hive/Hive_1_1ConnectionPool.java deleted file mode 100644 index 9ef9ed0fee..0000000000 --- a/nifi-nar-bundles/nifi-hive-bundle/nifi-hive_1_1-processors/src/main/java/org/apache/nifi/dbcp/hive/Hive_1_1ConnectionPool.java +++ /dev/null @@ -1,453 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.nifi.dbcp.hive; - -import org.apache.commons.dbcp2.BasicDataSource; -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.security.UserGroupInformation; -import org.apache.hive.jdbc.HiveDriver; -import org.apache.nifi.annotation.behavior.RequiresInstanceClassLoading; -import org.apache.nifi.annotation.documentation.CapabilityDescription; -import org.apache.nifi.annotation.documentation.DeprecationNotice; -import org.apache.nifi.annotation.documentation.Tags; -import org.apache.nifi.annotation.lifecycle.OnDisabled; -import org.apache.nifi.annotation.lifecycle.OnEnabled; -import org.apache.nifi.components.PropertyDescriptor; -import org.apache.nifi.components.PropertyValue; -import org.apache.nifi.components.ValidationContext; -import org.apache.nifi.components.ValidationResult; -import org.apache.nifi.components.resource.ResourceCardinality; -import org.apache.nifi.components.resource.ResourceType; -import org.apache.nifi.controller.AbstractControllerService; -import org.apache.nifi.controller.ConfigurationContext; -import org.apache.nifi.controller.ControllerServiceInitializationContext; -import org.apache.nifi.dbcp.DBCPValidator; -import org.apache.nifi.expression.AttributeExpression; -import org.apache.nifi.expression.ExpressionLanguageScope; -import org.apache.nifi.hadoop.SecurityUtil; -import org.apache.nifi.kerberos.KerberosCredentialsService; -import org.apache.nifi.logging.ComponentLog; -import org.apache.nifi.processor.exception.ProcessException; -import org.apache.nifi.processor.util.StandardValidators; -import org.apache.nifi.reporting.InitializationException; -import org.apache.nifi.security.krb.KerberosKeytabUser; -import org.apache.nifi.security.krb.KerberosLoginException; -import org.apache.nifi.security.krb.KerberosPasswordUser; -import org.apache.nifi.security.krb.KerberosUser; -import org.apache.nifi.util.hive.AuthenticationFailedException; -import org.apache.nifi.util.hive.HiveConfigurator; -import org.apache.nifi.util.hive.ValidationResources; - -import java.io.IOException; -import java.lang.reflect.UndeclaredThrowableException; -import java.security.PrivilegedExceptionAction; -import java.sql.Connection; -import java.sql.SQLException; -import java.util.ArrayList; -import java.util.Collection; -import java.util.List; -import java.util.Map; -import java.util.concurrent.TimeUnit; -import java.util.concurrent.atomic.AtomicReference; - -/** - * Implementation for Database Connection Pooling Service used for Apache Hive 1.1 - * connections. Apache DBCP is used for connection pooling functionality. - */ -@RequiresInstanceClassLoading -@Tags({"hive", "dbcp", "jdbc", "database", "connection", "pooling", "store"}) -@CapabilityDescription("Provides Database Connection Pooling Service for Apache Hive 1.1.x. Connections can be asked from pool and returned after usage.") -@DeprecationNotice(classNames = "org.apache.nifi.dbcp.hive.Hive3ConnectionPool") -public class Hive_1_1ConnectionPool extends AbstractControllerService implements Hive_1_1DBCPService { - - private static final String DEFAULT_MAX_CONN_LIFETIME = "-1"; - - public static final PropertyDescriptor DATABASE_URL = new PropertyDescriptor.Builder() - .name("hive-db-connect-url") - .displayName("Database Connection URL") - .description("A database connection URL used to connect to a database. May contain database system name, host, port, database name and some parameters." - + " The exact syntax of a database connection URL is specified by the Hive documentation. For example, the server principal is often included " - + "as a connection parameter when connecting to a secure Hive server.") - .defaultValue(null) - .addValidator(StandardValidators.NON_EMPTY_VALIDATOR) - .required(true) - .expressionLanguageSupported(ExpressionLanguageScope.VARIABLE_REGISTRY) - .build(); - - public static final PropertyDescriptor HIVE_CONFIGURATION_RESOURCES = new PropertyDescriptor.Builder() - .name("hive-config-resources") - .displayName("Hive Configuration Resources") - .description("A file or comma separated list of files which contains the Hive configuration (hive-site.xml, e.g.). Without this, Hadoop " - + "will search the classpath for a 'hive-site.xml' file or will revert to a default configuration. Note that to enable authentication " - + "with Kerberos e.g., the appropriate properties must be set in the configuration files. Please see the Hive documentation for more details.") - .required(false) - .identifiesExternalResource(ResourceCardinality.MULTIPLE, ResourceType.FILE) - .expressionLanguageSupported(ExpressionLanguageScope.VARIABLE_REGISTRY) - .build(); - - public static final PropertyDescriptor DB_USER = new PropertyDescriptor.Builder() - .name("hive-db-user") - .displayName("Database User") - .description("Database user name") - .defaultValue(null) - .addValidator(StandardValidators.NON_EMPTY_VALIDATOR) - .expressionLanguageSupported(ExpressionLanguageScope.VARIABLE_REGISTRY) - .build(); - - public static final PropertyDescriptor DB_PASSWORD = new PropertyDescriptor.Builder() - .name("hive-db-password") - .displayName("Password") - .description("The password for the database user") - .defaultValue(null) - .required(false) - .sensitive(true) - .addValidator(StandardValidators.NON_EMPTY_VALIDATOR) - .expressionLanguageSupported(ExpressionLanguageScope.VARIABLE_REGISTRY) - .build(); - - public static final PropertyDescriptor MAX_WAIT_TIME = new PropertyDescriptor.Builder() - .name("hive-max-wait-time") - .displayName("Max Wait Time") - .description("The maximum amount of time that the pool will wait (when there are no available connections) " - + " for a connection to be returned before failing, or -1 to wait indefinitely. ") - .defaultValue("500 millis") - .required(true) - .addValidator(StandardValidators.TIME_PERIOD_VALIDATOR) - .expressionLanguageSupported(ExpressionLanguageScope.VARIABLE_REGISTRY) - .build(); - - public static final PropertyDescriptor MAX_TOTAL_CONNECTIONS = new PropertyDescriptor.Builder() - .name("hive-max-total-connections") - .displayName("Max Total Connections") - .description("The maximum number of active connections that can be allocated from this pool at the same time, " - + "or negative for no limit.") - .defaultValue("8") - .required(true) - .addValidator(StandardValidators.INTEGER_VALIDATOR) - .expressionLanguageSupported(ExpressionLanguageScope.VARIABLE_REGISTRY) - .build(); - - public static final PropertyDescriptor MAX_CONN_LIFETIME = new PropertyDescriptor.Builder() - .displayName("Max Connection Lifetime") - .name("hive-max-conn-lifetime") - .description("The maximum lifetime in milliseconds of a connection. After this time is exceeded the " + - "connection pool will invalidate the connection. A value of zero or -1 " + - "means the connection has an infinite lifetime.") - .defaultValue(DEFAULT_MAX_CONN_LIFETIME) - .required(true) - .addValidator(DBCPValidator.CUSTOM_TIME_PERIOD_VALIDATOR) - .expressionLanguageSupported(ExpressionLanguageScope.VARIABLE_REGISTRY) - .build(); - - public static final PropertyDescriptor VALIDATION_QUERY = new PropertyDescriptor.Builder() - .name("Validation-query") - .displayName("Validation query") - .description("Validation query used to validate connections before returning them. " - + "When a borrowed connection is invalid, it gets dropped and a new valid connection will be returned. " - + "NOTE: Using validation may have a performance penalty.") - .required(false) - .addValidator(StandardValidators.NON_EMPTY_VALIDATOR) - .expressionLanguageSupported(ExpressionLanguageScope.VARIABLE_REGISTRY) - .build(); - - static final PropertyDescriptor KERBEROS_CREDENTIALS_SERVICE = new PropertyDescriptor.Builder() - .name("kerberos-credentials-service") - .displayName("Kerberos Credentials Service") - .description("Specifies the Kerberos Credentials Controller Service that should be used for authenticating with Kerberos") - .identifiesControllerService(KerberosCredentialsService.class) - .required(false) - .build(); - - static final PropertyDescriptor KERBEROS_PRINCIPAL = new PropertyDescriptor.Builder() - .name("kerberos-principal") - .displayName("Kerberos Principal") - .description("The principal to use when specifying the principal and password directly in the processor for authenticating via Kerberos.") - .required(false) - .addValidator(StandardValidators.NON_EMPTY_VALIDATOR) - .addValidator(StandardValidators.createAttributeExpressionLanguageValidator(AttributeExpression.ResultType.STRING)) - .expressionLanguageSupported(ExpressionLanguageScope.VARIABLE_REGISTRY) - .build(); - - static final PropertyDescriptor KERBEROS_PASSWORD = new PropertyDescriptor.Builder() - .name("kerberos-password") - .displayName("Kerberos Password") - .description("The password to use when specifying the principal and password directly in the processor for authenticating via Kerberos.") - .required(false) - .addValidator(StandardValidators.NON_EMPTY_VALIDATOR) - .sensitive(true) - .build(); - - - private List properties; - - private String connectionUrl = "unknown"; - - // Holder of cached Configuration information so validation does not reload the same config over and over - private final AtomicReference validationResourceHolder = new AtomicReference<>(); - - private volatile BasicDataSource dataSource; - - private volatile HiveConfigurator hiveConfigurator = new HiveConfigurator(); - private volatile UserGroupInformation ugi; - private final AtomicReference kerberosUserReference = new AtomicReference<>(); - - @Override - protected void init(final ControllerServiceInitializationContext context) { - List props = new ArrayList<>(); - props.add(DATABASE_URL); - props.add(HIVE_CONFIGURATION_RESOURCES); - props.add(DB_USER); - props.add(DB_PASSWORD); - props.add(MAX_WAIT_TIME); - props.add(MAX_TOTAL_CONNECTIONS); - props.add(MAX_CONN_LIFETIME); - props.add(VALIDATION_QUERY); - props.add(KERBEROS_CREDENTIALS_SERVICE); - props.add(KERBEROS_PRINCIPAL); - props.add(KERBEROS_PASSWORD); - - properties = props; - } - - @Override - protected List getSupportedPropertyDescriptors() { - return properties; - } - - @Override - protected Collection customValidate(ValidationContext validationContext) { - boolean confFileProvided = validationContext.getProperty(HIVE_CONFIGURATION_RESOURCES).isSet(); - - final List problems = new ArrayList<>(); - - if (confFileProvided) { - final KerberosCredentialsService credentialsService = validationContext.getProperty(KERBEROS_CREDENTIALS_SERVICE).asControllerService(KerberosCredentialsService.class); - final String explicitPrincipal = validationContext.getProperty(KERBEROS_PRINCIPAL).evaluateAttributeExpressions().getValue(); - final String explicitPassword = validationContext.getProperty(KERBEROS_PASSWORD).getValue(); - - final String resolvedPrincipal; - final String resolvedKeytab; - if (credentialsService != null) { - resolvedPrincipal = credentialsService.getPrincipal(); - resolvedKeytab = credentialsService.getKeytab(); - } else { - resolvedPrincipal = explicitPrincipal; - resolvedKeytab = null; - } - - final String configFiles = validationContext.getProperty(HIVE_CONFIGURATION_RESOURCES).evaluateAttributeExpressions().getValue(); - problems.addAll(hiveConfigurator.validate(configFiles, resolvedPrincipal, resolvedKeytab, explicitPassword, validationResourceHolder, getLogger())); - - if (credentialsService != null && (explicitPrincipal != null || explicitPassword != null)) { - problems.add(new ValidationResult.Builder() - .subject(KERBEROS_CREDENTIALS_SERVICE.getDisplayName()) - .valid(false) - .explanation("kerberos principal/password and kerberos credential service cannot be configured at the same time") - .build()); - } - } - - return problems; - } - - /** - * Configures connection pool by creating an instance of the - * {@link BasicDataSource} based on configuration provided with - * {@link ConfigurationContext}. - *

- * This operation makes no guarantees that the actual connection could be - * made since the underlying system may still go off-line during normal - * operation of the connection pool. - *

- * As of Apache NiFi 1.5.0, due to changes made to - * {@link SecurityUtil#loginKerberos(Configuration, String, String)}, which is used by this class invoking - * {@link HiveConfigurator#authenticate(Configuration, String, String)} - * to authenticate a principal with Kerberos, Hive controller services no longer use a separate thread to - * relogin, and instead call {@link UserGroupInformation#checkTGTAndReloginFromKeytab()} from - * {@link Hive_1_1ConnectionPool#getConnection()}. The relogin request is performed in a synchronized block to prevent - * threads from requesting concurrent relogins. For more information, please read the documentation for - * {@link SecurityUtil#loginKerberos(Configuration, String, String)}. - *

- * In previous versions of NiFi, a {@link org.apache.nifi.hadoop.KerberosTicketRenewer} was started by - * {@link HiveConfigurator#authenticate(Configuration, String, String, long)} when the Hive - * controller service was enabled. The use of a separate thread to explicitly relogin could cause race conditions - * with the implicit relogin attempts made by hadoop/Hive code on a thread that references the same - * {@link UserGroupInformation} instance. One of these threads could leave the - * {@link javax.security.auth.Subject} in {@link UserGroupInformation} to be cleared or in an unexpected state - * while the other thread is attempting to use the {@link javax.security.auth.Subject}, resulting in failed - * authentication attempts that would leave the Hive controller service in an unrecoverable state. - * - * @see SecurityUtil#loginKerberos(Configuration, String, String) - * @see HiveConfigurator#authenticate(Configuration, String, String) - * @see HiveConfigurator#authenticate(Configuration, String, String, long) - * @param context the configuration context - * @throws InitializationException if unable to create a database connection - */ - @OnEnabled - public void onConfigured(final ConfigurationContext context) throws InitializationException { - - ComponentLog log = getLogger(); - - final String configFiles = context.getProperty(HIVE_CONFIGURATION_RESOURCES).evaluateAttributeExpressions().getValue(); - final Configuration hiveConfig = hiveConfigurator.getConfigurationFromFiles(configFiles); - final String validationQuery = context.getProperty(VALIDATION_QUERY).evaluateAttributeExpressions().getValue(); - - // add any dynamic properties to the Hive configuration - for (final Map.Entry entry : context.getProperties().entrySet()) { - final PropertyDescriptor descriptor = entry.getKey(); - if (descriptor.isDynamic()) { - hiveConfig.set(descriptor.getName(), context.getProperty(descriptor).evaluateAttributeExpressions().getValue()); - } - } - - final String drv = HiveDriver.class.getName(); - if (SecurityUtil.isSecurityEnabled(hiveConfig)) { - final String explicitPrincipal = context.getProperty(KERBEROS_PRINCIPAL).evaluateAttributeExpressions().getValue(); - final String explicitPassword = context.getProperty(KERBEROS_PASSWORD).getValue(); - final KerberosCredentialsService credentialsService = context.getProperty(KERBEROS_CREDENTIALS_SERVICE).asControllerService(KerberosCredentialsService.class); - - final String resolvedPrincipal; - final String resolvedKeytab; - if (credentialsService != null) { - resolvedPrincipal = credentialsService.getPrincipal(); - resolvedKeytab = credentialsService.getKeytab(); - } else { - resolvedPrincipal = explicitPrincipal; - resolvedKeytab = null; - } - - if (resolvedKeytab != null) { - kerberosUserReference.set(new KerberosKeytabUser(resolvedPrincipal, resolvedKeytab)); - log.info("Hive Security Enabled, logging in as principal {} with keytab {}", new Object[] {resolvedPrincipal, resolvedKeytab}); - } else if (explicitPassword != null) { - kerberosUserReference.set(new KerberosPasswordUser(resolvedPrincipal, explicitPassword)); - log.info("Hive Security Enabled, logging in as principal {} with password", new Object[] {resolvedPrincipal}); - } else { - throw new InitializationException("Unable to authenticate with Kerberos, no keytab or password was provided"); - } - - try { - ugi = hiveConfigurator.authenticate(hiveConfig, kerberosUserReference.get()); - } catch (AuthenticationFailedException ae) { - log.error(ae.getMessage(), ae); - throw new InitializationException(ae); - } - - getLogger().info("Successfully logged in as principal " + resolvedPrincipal); - } - - final String user = context.getProperty(DB_USER).evaluateAttributeExpressions().getValue(); - final String passw = context.getProperty(DB_PASSWORD).evaluateAttributeExpressions().getValue(); - final Long maxWaitMillis = context.getProperty(MAX_WAIT_TIME).evaluateAttributeExpressions().asTimePeriod(TimeUnit.MILLISECONDS); - final Integer maxTotal = context.getProperty(MAX_TOTAL_CONNECTIONS).evaluateAttributeExpressions().asInteger(); - final long maxConnectionLifetimeMillis = extractMillisWithInfinite(context.getProperty(MAX_CONN_LIFETIME).evaluateAttributeExpressions()); - - dataSource = new BasicDataSource(); - dataSource.setDriverClassName(drv); - - connectionUrl = context.getProperty(DATABASE_URL).evaluateAttributeExpressions().getValue(); - - dataSource.setMaxWaitMillis(maxWaitMillis); - dataSource.setMaxTotal(maxTotal); - dataSource.setMaxConnLifetimeMillis(maxConnectionLifetimeMillis); - - if (validationQuery != null && !validationQuery.isEmpty()) { - dataSource.setValidationQuery(validationQuery); - dataSource.setTestOnBorrow(true); - } - - dataSource.setUrl(connectionUrl); - dataSource.setUsername(user); - dataSource.setPassword(passw); - } - - /** - * Shutdown pool, close all open connections. - */ - @OnDisabled - public void shutdown() { - try { - if(dataSource != null) { - dataSource.close(); - } - } catch (final SQLException e) { - throw new ProcessException(e); - } - } - - @Override - public Connection getConnection() throws ProcessException { - try { - if (ugi != null) { - /* - * Explicitly check the TGT and relogin if necessary with the KerberosUser instance. No synchronization - * is necessary in the client code, since AbstractKerberosUser's checkTGTAndRelogin method is synchronized. - */ - getLogger().trace("getting UGI instance"); - if (kerberosUserReference.get() != null) { - // if there's a KerberosUser associated with this UGI, check the TGT and relogin if it is close to expiring - KerberosUser kerberosUser = kerberosUserReference.get(); - getLogger().debug("kerberosUser is " + kerberosUser); - try { - getLogger().debug("checking TGT on kerberosUser " + kerberosUser); - kerberosUser.checkTGTAndRelogin(); - } catch (final KerberosLoginException e) { - throw new ProcessException("Unable to relogin with kerberos credentials for " + kerberosUser.getPrincipal(), e); - } - } else { - getLogger().debug("kerberosUser was null, will not refresh TGT with KerberosUser"); - // no synchronization is needed for UserGroupInformation.checkTGTAndReloginFromKeytab; UGI handles the synchronization internally - ugi.checkTGTAndReloginFromKeytab(); - } - try { - return ugi.doAs((PrivilegedExceptionAction) () -> dataSource.getConnection()); - } catch (UndeclaredThrowableException e) { - Throwable cause = e.getCause(); - if (cause instanceof SQLException) { - throw (SQLException) cause; - } else { - throw e; - } - } - } else { - getLogger().info("Simple Authentication"); - return dataSource.getConnection(); - } - } catch (SQLException | IOException | InterruptedException e) { - getLogger().error("Error getting Hive connection", e); - throw new ProcessException(e); - } - } - - @Override - public String toString() { - return "HiveConnectionPool[id=" + getIdentifier() + "]"; - } - - @Override - public String getConnectionURL() { - return connectionUrl; - } - - private long extractMillisWithInfinite(PropertyValue prop) { - if (prop.getValue() == null || DEFAULT_MAX_CONN_LIFETIME.equals(prop.getValue())) { - return -1; - } else { - return prop.asTimePeriod(TimeUnit.MILLISECONDS); - } - } -} diff --git a/nifi-nar-bundles/nifi-hive-bundle/nifi-hive_1_1-processors/src/main/java/org/apache/nifi/processors/hive/AbstractHive_1_1QLProcessor.java b/nifi-nar-bundles/nifi-hive-bundle/nifi-hive_1_1-processors/src/main/java/org/apache/nifi/processors/hive/AbstractHive_1_1QLProcessor.java deleted file mode 100644 index 14b207c572..0000000000 --- a/nifi-nar-bundles/nifi-hive-bundle/nifi-hive_1_1-processors/src/main/java/org/apache/nifi/processors/hive/AbstractHive_1_1QLProcessor.java +++ /dev/null @@ -1,344 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.nifi.processors.hive; - -import org.antlr.runtime.tree.CommonTree; -import org.apache.hadoop.hive.ql.parse.ASTNode; -import org.apache.hadoop.hive.ql.parse.ParseDriver; -import org.apache.hadoop.hive.ql.parse.ParseException; -import org.apache.nifi.components.PropertyDescriptor; -import org.apache.nifi.dbcp.hive.Hive_1_1DBCPService; -import org.apache.nifi.flowfile.FlowFile; -import org.apache.nifi.processor.AbstractSessionFactoryProcessor; -import org.apache.nifi.processor.ProcessSession; -import org.apache.nifi.processor.io.InputStreamCallback; -import org.apache.nifi.processor.util.StandardValidators; -import org.apache.nifi.stream.io.StreamUtils; - -import java.io.IOException; -import java.io.InputStream; -import java.math.BigDecimal; -import java.nio.charset.Charset; -import java.sql.Date; -import java.sql.PreparedStatement; -import java.sql.SQLDataException; -import java.sql.SQLException; -import java.sql.Time; -import java.sql.Timestamp; -import java.sql.Types; -import java.util.Collections; -import java.util.HashMap; -import java.util.HashSet; -import java.util.Map; -import java.util.Set; -import java.util.TreeMap; -import java.util.regex.Matcher; -import java.util.regex.Pattern; - -/** - * An abstract base class for HiveQL processors to share common data, methods, etc. - */ -public abstract class AbstractHive_1_1QLProcessor extends AbstractSessionFactoryProcessor { - - protected static final Pattern HIVEQL_TYPE_ATTRIBUTE_PATTERN = Pattern.compile("hiveql\\.args\\.(\\d+)\\.type"); - protected static final Pattern NUMBER_PATTERN = Pattern.compile("-?\\d+"); - static String ATTR_INPUT_TABLES = "query.input.tables"; - static String ATTR_OUTPUT_TABLES = "query.output.tables"; - - - public static final PropertyDescriptor HIVE_DBCP_SERVICE = new PropertyDescriptor.Builder() - .name("Hive Database Connection Pooling Service") - .description("The Hive Controller Service that is used to obtain connection(s) to the Hive database") - .required(true) - .identifiesControllerService(Hive_1_1DBCPService.class) - .build(); - - public static final PropertyDescriptor CHARSET = new PropertyDescriptor.Builder() - .name("hive-charset") - .displayName("Character Set") - .description("Specifies the character set of the record data.") - .required(true) - .defaultValue("UTF-8") - .addValidator(StandardValidators.CHARACTER_SET_VALIDATOR) - .build(); - - /** - * Determines the HiveQL statement that should be executed for the given FlowFile - * - * @param session the session that can be used to access the given FlowFile - * @param flowFile the FlowFile whose HiveQL statement should be executed - * @return the HiveQL that is associated with the given FlowFile - */ - protected String getHiveQL(final ProcessSession session, final FlowFile flowFile, final Charset charset) { - // Read the HiveQL from the FlowFile's content - final byte[] buffer = new byte[(int) flowFile.getSize()]; - session.read(flowFile, new InputStreamCallback() { - @Override - public void process(final InputStream in) throws IOException { - StreamUtils.fillBuffer(in, buffer); - } - }); - - // Create the PreparedStatement to use for this FlowFile. - return new String(buffer, charset); - } - - private class ParameterHolder { - String attributeName; - int jdbcType; - String value; - } - - /** - * Sets all of the appropriate parameters on the given PreparedStatement, based on the given FlowFile attributes. - * - * @param stmt the statement to set the parameters on - * @param attributes the attributes from which to derive parameter indices, values, and types - * @throws SQLException if the PreparedStatement throws a SQLException when the appropriate setter is called - */ - protected int setParameters(int base, final PreparedStatement stmt, int paramCount, final Map attributes) throws SQLException { - - Map parmMap = new TreeMap(); - - for (final Map.Entry entry : attributes.entrySet()) { - final String key = entry.getKey(); - final Matcher matcher = HIVEQL_TYPE_ATTRIBUTE_PATTERN.matcher(key); - if (matcher.matches()) { - final int parameterIndex = Integer.parseInt(matcher.group(1)); - if (parameterIndex >= base && parameterIndex < base + paramCount) { - final boolean isNumeric = NUMBER_PATTERN.matcher(entry.getValue()).matches(); - if (!isNumeric) { - throw new SQLDataException("Value of the " + key + " attribute is '" + entry.getValue() + "', which is not a valid JDBC numeral jdbcType"); - } - - final String valueAttrName = "hiveql.args." + parameterIndex + ".value"; - - ParameterHolder ph = new ParameterHolder(); - int realIndexLoc = parameterIndex - base +1; - - ph.jdbcType = Integer.parseInt(entry.getValue()); - ph.value = attributes.get(valueAttrName); - ph.attributeName = valueAttrName; - - parmMap.put(realIndexLoc, ph); - - } - } - } - - - // Now that's we've retrieved the correct number of parameters and it's sorted, let's set them. - for (final Map.Entry entry : parmMap.entrySet()) { - final Integer index = entry.getKey(); - final ParameterHolder ph = entry.getValue(); - - try { - setParameter(stmt, ph.attributeName, index, ph.value, ph.jdbcType); - } catch (final NumberFormatException nfe) { - throw new SQLDataException("The value of the " + ph.attributeName + " is '" + ph.value + "', which cannot be converted into the necessary data jdbcType", nfe); - } - } - return base + paramCount; - } - - /** - * Determines how to map the given value to the appropriate JDBC data jdbcType and sets the parameter on the - * provided PreparedStatement - * - * @param stmt the PreparedStatement to set the parameter on - * @param attrName the name of the attribute that the parameter is coming from - for logging purposes - * @param parameterIndex the index of the HiveQL parameter to set - * @param parameterValue the value of the HiveQL parameter to set - * @param jdbcType the JDBC Type of the HiveQL parameter to set - * @throws SQLException if the PreparedStatement throws a SQLException when calling the appropriate setter - */ - protected void setParameter(final PreparedStatement stmt, final String attrName, final int parameterIndex, final String parameterValue, final int jdbcType) throws SQLException { - if (parameterValue == null) { - stmt.setNull(parameterIndex, jdbcType); - } else { - try { - switch (jdbcType) { - case Types.BIT: - case Types.BOOLEAN: - stmt.setBoolean(parameterIndex, Boolean.parseBoolean(parameterValue)); - break; - case Types.TINYINT: - stmt.setByte(parameterIndex, Byte.parseByte(parameterValue)); - break; - case Types.SMALLINT: - stmt.setShort(parameterIndex, Short.parseShort(parameterValue)); - break; - case Types.INTEGER: - stmt.setInt(parameterIndex, Integer.parseInt(parameterValue)); - break; - case Types.BIGINT: - stmt.setLong(parameterIndex, Long.parseLong(parameterValue)); - break; - case Types.REAL: - stmt.setFloat(parameterIndex, Float.parseFloat(parameterValue)); - break; - case Types.FLOAT: - case Types.DOUBLE: - stmt.setDouble(parameterIndex, Double.parseDouble(parameterValue)); - break; - case Types.DECIMAL: - case Types.NUMERIC: - stmt.setBigDecimal(parameterIndex, new BigDecimal(parameterValue)); - break; - case Types.DATE: - stmt.setDate(parameterIndex, new Date(Long.parseLong(parameterValue))); - break; - case Types.TIME: - stmt.setTime(parameterIndex, new Time(Long.parseLong(parameterValue))); - break; - case Types.TIMESTAMP: - stmt.setTimestamp(parameterIndex, new Timestamp(Long.parseLong(parameterValue))); - break; - case Types.CHAR: - case Types.VARCHAR: - case Types.LONGNVARCHAR: - case Types.LONGVARCHAR: - stmt.setString(parameterIndex, parameterValue); - break; - default: - stmt.setObject(parameterIndex, parameterValue, jdbcType); - break; - } - } catch (SQLException e) { - // Log which attribute/parameter had an error, then rethrow to be handled at the top level - getLogger().error("Error setting parameter {} to value from {} ({})", new Object[]{parameterIndex, attrName, parameterValue}, e); - throw e; - } - } - } - - protected static class TableName { - private final String database; - private final String table; - private final boolean input; - - TableName(String database, String table, boolean input) { - this.database = database; - this.table = table; - this.input = input; - } - - public String getDatabase() { - return database; - } - - public String getTable() { - return table; - } - - public boolean isInput() { - return input; - } - - @Override - public String toString() { - return database == null || database.isEmpty() ? table : database + '.' + table; - } - - @Override - public boolean equals(Object o) { - if (this == o) return true; - if (o == null || getClass() != o.getClass()) return false; - - TableName tableName = (TableName) o; - - if (input != tableName.input) return false; - if (database != null ? !database.equals(tableName.database) : tableName.database != null) return false; - return table.equals(tableName.table); - } - - @Override - public int hashCode() { - int result = database != null ? database.hashCode() : 0; - result = 31 * result + table.hashCode(); - result = 31 * result + (input ? 1 : 0); - return result; - } - } - - protected Set findTableNames(final String query) { - final ASTNode node; - try { - node = new ParseDriver().parse(normalize(query)); - } catch (ParseException e) { - // If failed to parse the query, just log a message, but continue. - getLogger().debug("Failed to parse query: {} due to {}", new Object[]{query, e}, e); - return Collections.emptySet(); - } - - final HashSet tableNames = new HashSet<>(); - findTableNames(node, tableNames); - return tableNames; - } - - /** - * Normalize query. - * Hive resolves prepared statement parameters before executing a query, - * see {@link org.apache.hive.jdbc.HivePreparedStatement#updateSql(String, HashMap)} for detail. - * HiveParser does not expect '?' to be in a query string, and throws an Exception if there is one. - * In this normalize method, '?' is replaced to 'x' to avoid that. - */ - private String normalize(String query) { - return query.replace('?', 'x'); - } - - private void findTableNames(final Object obj, final Set tableNames) { - if (!(obj instanceof CommonTree)) { - return; - } - final CommonTree tree = (CommonTree) obj; - final int childCount = tree.getChildCount(); - if ("TOK_TABNAME".equals(tree.getText())) { - final TableName tableName; - final boolean isInput = "TOK_TABREF".equals(tree.getParent().getText()); - switch (childCount) { - case 1 : - tableName = new TableName(null, tree.getChild(0).getText(), isInput); - break; - case 2: - tableName = new TableName(tree.getChild(0).getText(), tree.getChild(1).getText(), isInput); - break; - default: - throw new IllegalStateException("TOK_TABNAME does not have expected children, childCount=" + childCount); - } - // If parent is TOK_TABREF, then it is an input table. - tableNames.add(tableName); - return; - } - for (int i = 0; i < childCount; i++) { - findTableNames(tree.getChild(i), tableNames); - } - } - - protected Map toQueryTableAttributes(Set tableNames) { - final Map attributes = new HashMap<>(); - for (TableName tableName : tableNames) { - final String attributeName = tableName.isInput() ? ATTR_INPUT_TABLES : ATTR_OUTPUT_TABLES; - if (attributes.containsKey(attributeName)) { - attributes.put(attributeName, attributes.get(attributeName) + "," + tableName); - } else { - attributes.put(attributeName, tableName.toString()); - } - } - return attributes; - } -} diff --git a/nifi-nar-bundles/nifi-hive-bundle/nifi-hive_1_1-processors/src/main/java/org/apache/nifi/processors/hive/PutHive_1_1QL.java b/nifi-nar-bundles/nifi-hive-bundle/nifi-hive_1_1-processors/src/main/java/org/apache/nifi/processors/hive/PutHive_1_1QL.java deleted file mode 100644 index 05ac2eb667..0000000000 --- a/nifi-nar-bundles/nifi-hive-bundle/nifi-hive_1_1-processors/src/main/java/org/apache/nifi/processors/hive/PutHive_1_1QL.java +++ /dev/null @@ -1,300 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.nifi.processors.hive; - -import org.apache.commons.lang3.StringUtils; -import org.apache.nifi.annotation.behavior.InputRequirement; -import org.apache.nifi.annotation.behavior.InputRequirement.Requirement; -import org.apache.nifi.annotation.behavior.ReadsAttribute; -import org.apache.nifi.annotation.behavior.ReadsAttributes; -import org.apache.nifi.annotation.behavior.WritesAttribute; -import org.apache.nifi.annotation.behavior.WritesAttributes; -import org.apache.nifi.annotation.documentation.CapabilityDescription; -import org.apache.nifi.annotation.documentation.DeprecationNotice; -import org.apache.nifi.annotation.documentation.SeeAlso; -import org.apache.nifi.annotation.documentation.Tags; -import org.apache.nifi.annotation.lifecycle.OnScheduled; -import org.apache.nifi.components.PropertyDescriptor; -import org.apache.nifi.dbcp.hive.Hive_1_1DBCPService; -import org.apache.nifi.expression.ExpressionLanguageScope; -import org.apache.nifi.flowfile.FlowFile; -import org.apache.nifi.processor.ProcessContext; -import org.apache.nifi.processor.ProcessSession; -import org.apache.nifi.processor.ProcessSessionFactory; -import org.apache.nifi.processor.Relationship; -import org.apache.nifi.processor.exception.ProcessException; -import org.apache.nifi.processor.util.StandardValidators; -import org.apache.nifi.processor.util.pattern.ErrorTypes; -import org.apache.nifi.processor.util.pattern.ExceptionHandler; -import org.apache.nifi.processor.util.pattern.ExceptionHandler.OnError; -import org.apache.nifi.processor.util.pattern.PartialFunctions.FetchFlowFiles; -import org.apache.nifi.processor.util.pattern.PartialFunctions.InitConnection; -import org.apache.nifi.processor.util.pattern.Put; -import org.apache.nifi.processor.util.pattern.RollbackOnFailure; -import org.apache.nifi.processor.util.pattern.RoutingResult; - -import java.nio.charset.Charset; -import java.sql.Connection; -import java.sql.PreparedStatement; -import java.sql.SQLException; -import java.sql.SQLNonTransientException; -import java.util.ArrayList; -import java.util.Collections; -import java.util.HashSet; -import java.util.List; -import java.util.Set; -import java.util.concurrent.TimeUnit; -import java.util.regex.Pattern; - -@SeeAlso(SelectHive_1_1QL.class) -@InputRequirement(Requirement.INPUT_REQUIRED) -@Tags({"sql", "hive", "put", "database", "update", "insert"}) -@CapabilityDescription("Executes a HiveQL DDL/DML command (UPDATE, INSERT, e.g.). The content of an incoming FlowFile is expected to be the HiveQL command " - + "to execute. The HiveQL command may use the ? to escape parameters. In this case, the parameters to use must exist as FlowFile attributes " - + "with the naming convention hiveql.args.N.type and hiveql.args.N.value, where N is a positive integer. The hiveql.args.N.type is expected to be " - + "a number indicating the JDBC Type. The content of the FlowFile is expected to be in UTF-8 format.") -@ReadsAttributes({ - @ReadsAttribute(attribute = "hiveql.args.N.type", description = "Incoming FlowFiles are expected to be parametrized HiveQL statements. The type of each Parameter is specified as an integer " - + "that represents the JDBC Type of the parameter."), - @ReadsAttribute(attribute = "hiveql.args.N.value", description = "Incoming FlowFiles are expected to be parametrized HiveQL statements. The value of the Parameters are specified as " - + "hiveql.args.1.value, hiveql.args.2.value, hiveql.args.3.value, and so on. The type of the hiveql.args.1.value Parameter is specified by the hiveql.args.1.type attribute.") -}) -@WritesAttributes({ - @WritesAttribute(attribute = "query.input.tables", description = "This attribute is written on the flow files routed to the 'success' relationships, " - + "and contains input table names (if any) in comma delimited 'databaseName.tableName' format."), - @WritesAttribute(attribute = "query.output.tables", description = "This attribute is written on the flow files routed to the 'success' relationships, " - + "and contains the target table names in 'databaseName.tableName' format.") -}) -@DeprecationNotice(classNames = "org.apache.nifi.processors.hive.PutHive3QL") -public class PutHive_1_1QL extends AbstractHive_1_1QLProcessor { - - public static final PropertyDescriptor BATCH_SIZE = new PropertyDescriptor.Builder() - .name("hive-batch-size") - .displayName("Batch Size") - .description("The preferred number of FlowFiles to put to the database in a single transaction") - .required(true) - .addValidator(StandardValidators.POSITIVE_INTEGER_VALIDATOR) - .defaultValue("100") - .build(); - - public static final PropertyDescriptor STATEMENT_DELIMITER = new PropertyDescriptor.Builder() - .name("statement-delimiter") - .displayName("Statement Delimiter") - .description("Statement Delimiter used to separate SQL statements in a multiple statement script") - .required(true) - .defaultValue(";") - .addValidator(StandardValidators.NON_EMPTY_VALIDATOR) - .expressionLanguageSupported(ExpressionLanguageScope.NONE) - .build(); - - public static final Relationship REL_SUCCESS = new Relationship.Builder() - .name("success") - .description("A FlowFile is routed to this relationship after the database is successfully updated") - .build(); - public static final Relationship REL_RETRY = new Relationship.Builder() - .name("retry") - .description("A FlowFile is routed to this relationship if the database cannot be updated but attempting the operation again may succeed") - .build(); - public static final Relationship REL_FAILURE = new Relationship.Builder() - .name("failure") - .description("A FlowFile is routed to this relationship if the database cannot be updated and retrying the operation will also fail, " - + "such as an invalid query or an integrity constraint violation") - .build(); - - - private final static List propertyDescriptors; - private final static Set relationships; - - /* - * Will ensure that the list of property descriptors is built only once. - * Will also create a Set of relationships - */ - static { - List _propertyDescriptors = new ArrayList<>(); - _propertyDescriptors.add(HIVE_DBCP_SERVICE); - _propertyDescriptors.add(BATCH_SIZE); - _propertyDescriptors.add(CHARSET); - _propertyDescriptors.add(STATEMENT_DELIMITER); - _propertyDescriptors.add(RollbackOnFailure.ROLLBACK_ON_FAILURE); - propertyDescriptors = Collections.unmodifiableList(_propertyDescriptors); - - Set _relationships = new HashSet<>(); - _relationships.add(REL_SUCCESS); - _relationships.add(REL_FAILURE); - _relationships.add(REL_RETRY); - relationships = Collections.unmodifiableSet(_relationships); - } - - private Put process; - private ExceptionHandler exceptionHandler; - - @OnScheduled - public void constructProcess() { - exceptionHandler = new ExceptionHandler<>(); - exceptionHandler.mapException(e -> { - if (e instanceof SQLNonTransientException) { - return ErrorTypes.InvalidInput; - } else if (e instanceof SQLException) { - // Use the SQLException's vendor code for guidance -- see Hive's ErrorMsg class for details on error codes - int errorCode = ((SQLException) e).getErrorCode(); - getLogger().debug("Error occurred during Hive operation, Hive returned error code {}", new Object[]{errorCode}); - if (errorCode >= 10000 && errorCode < 20000) { - return ErrorTypes.InvalidInput; - } else if (errorCode >= 20000 && errorCode < 30000) { - return ErrorTypes.InvalidInput; - } else if (errorCode >= 30000 && errorCode < 40000) { - return ErrorTypes.TemporalInputFailure; - } else if (errorCode >= 40000 && errorCode < 50000) { - // These are unknown errors (to include some parse errors), but rather than generating an UnknownFailure which causes - // a ProcessException, we'll route to failure via an InvalidInput error type. - return ErrorTypes.InvalidInput; - } else { - // Default unknown errors to TemporalFailure (as they were implemented originally), so they can be routed to failure - // or rolled back depending on the user's setting of Rollback On Failure. - return ErrorTypes.TemporalFailure; - } - } else { - return ErrorTypes.UnknownFailure; - } - }); - exceptionHandler.adjustError(RollbackOnFailure.createAdjustError(getLogger())); - - process = new Put<>(); - process.setLogger(getLogger()); - process.initConnection(initConnection); - process.fetchFlowFiles(fetchFlowFiles); - process.putFlowFile(putFlowFile); - process.adjustRoute(RollbackOnFailure.createAdjustRoute(REL_FAILURE, REL_RETRY)); - } - - @Override - protected List getSupportedPropertyDescriptors() { - return propertyDescriptors; - } - - @Override - public Set getRelationships() { - return relationships; - } - - private class FunctionContext extends RollbackOnFailure { - final Charset charset; - final String statementDelimiter; - final long startNanos = System.nanoTime(); - - String connectionUrl; - - - private FunctionContext(boolean rollbackOnFailure, Charset charset, String statementDelimiter) { - super(rollbackOnFailure, false); - this.charset = charset; - this.statementDelimiter = statementDelimiter; - } - } - - private InitConnection initConnection = (context, session, fc, ffs) -> { - final Hive_1_1DBCPService dbcpService = context.getProperty(HIVE_DBCP_SERVICE).asControllerService(Hive_1_1DBCPService.class); - final Connection connection = dbcpService.getConnection(ffs == null || ffs.isEmpty() ? Collections.emptyMap() : ffs.get(0).getAttributes()); - fc.connectionUrl = dbcpService.getConnectionURL(); - return connection; - }; - - private FetchFlowFiles fetchFlowFiles = (context, session, functionContext, result) -> { - final int batchSize = context.getProperty(BATCH_SIZE).asInteger(); - return session.get(batchSize); - }; - - private Put.PutFlowFile putFlowFile = (context, session, fc, conn, flowFile, result) -> { - final String script = getHiveQL(session, flowFile, fc.charset); - String regex = "(? tableNames = new HashSet<>(); - exceptionHandler.execute(fc, flowFile, input -> { - int loc = 1; - for (String hiveQLStr: hiveQLs) { - getLogger().debug("HiveQL: {}", new Object[]{hiveQLStr}); - - final String hiveQL = hiveQLStr.trim(); - if (!StringUtils.isEmpty(hiveQL)) { - try (final PreparedStatement stmt = conn.prepareStatement(hiveQL)) { - - // Get ParameterMetadata - // Hive JDBC Doesn't support this yet: - // ParameterMetaData pmd = stmt.getParameterMetaData(); - // int paramCount = pmd.getParameterCount(); - int paramCount = StringUtils.countMatches(hiveQL, "?"); - - if (paramCount > 0) { - loc = setParameters(loc, stmt, paramCount, flowFile.getAttributes()); - } - - // Parse hiveQL and extract input/output tables - try { - tableNames.addAll(findTableNames(hiveQL)); - } catch (Exception e) { - // If failed to parse the query, just log a warning message, but continue. - getLogger().warn("Failed to parse hiveQL: {} due to {}", new Object[]{hiveQL, e}, e); - } - - // Execute the statement - stmt.execute(); - fc.proceed(); - } - } - } - - // Emit a Provenance SEND event - final long transmissionMillis = TimeUnit.NANOSECONDS.toMillis(System.nanoTime() - fc.startNanos); - - final FlowFile updatedFlowFile = session.putAllAttributes(flowFile, toQueryTableAttributes(tableNames)); - session.getProvenanceReporter().send(updatedFlowFile, fc.connectionUrl, transmissionMillis, true); - result.routeTo(flowFile, REL_SUCCESS); - - }, onFlowFileError(context, session, result)); - - }; - - private OnError onFlowFileError(final ProcessContext context, final ProcessSession session, final RoutingResult result) { - OnError onFlowFileError = ExceptionHandler.createOnError(context, session, result, REL_FAILURE, REL_RETRY); - onFlowFileError = onFlowFileError.andThen((c, i, r, e) -> { - switch (r.destination()) { - case Failure: - getLogger().error("Failed to update Hive for {} due to {}; routing to failure", new Object[] {i, e}, e); - break; - case Retry: - getLogger().error("Failed to update Hive for {} due to {}; it is possible that retrying the operation will succeed, so routing to retry", - new Object[] {i, e}, e); - break; - case Self: - getLogger().error("Failed to update Hive for {} due to {};", new Object[] {i, e}, e); - break; - } - }); - return RollbackOnFailure.createOnError(onFlowFileError); - } - - @Override - public void onTrigger(ProcessContext context, ProcessSessionFactory sessionFactory) throws ProcessException { - final Boolean rollbackOnFailure = context.getProperty(RollbackOnFailure.ROLLBACK_ON_FAILURE).asBoolean(); - final Charset charset = Charset.forName(context.getProperty(CHARSET).getValue()); - final String statementDelimiter = context.getProperty(STATEMENT_DELIMITER).getValue(); - final FunctionContext functionContext = new FunctionContext(rollbackOnFailure, charset, statementDelimiter); - RollbackOnFailure.onTrigger(context, sessionFactory, functionContext, getLogger(), session -> process.onTrigger(context, session, functionContext)); - } -} \ No newline at end of file diff --git a/nifi-nar-bundles/nifi-hive-bundle/nifi-hive_1_1-processors/src/main/java/org/apache/nifi/processors/hive/SelectHive_1_1QL.java b/nifi-nar-bundles/nifi-hive-bundle/nifi-hive_1_1-processors/src/main/java/org/apache/nifi/processors/hive/SelectHive_1_1QL.java deleted file mode 100644 index af05500ffa..0000000000 --- a/nifi-nar-bundles/nifi-hive-bundle/nifi-hive_1_1-processors/src/main/java/org/apache/nifi/processors/hive/SelectHive_1_1QL.java +++ /dev/null @@ -1,554 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.nifi.processors.hive; - -import org.apache.commons.io.IOUtils; -import org.apache.commons.lang.StringUtils; -import org.apache.commons.lang3.tuple.Pair; -import org.apache.nifi.annotation.behavior.EventDriven; -import org.apache.nifi.annotation.behavior.InputRequirement; -import org.apache.nifi.annotation.behavior.InputRequirement.Requirement; -import org.apache.nifi.annotation.behavior.WritesAttribute; -import org.apache.nifi.annotation.behavior.WritesAttributes; -import org.apache.nifi.annotation.documentation.CapabilityDescription; -import org.apache.nifi.annotation.documentation.DeprecationNotice; -import org.apache.nifi.annotation.documentation.Tags; -import org.apache.nifi.annotation.lifecycle.OnScheduled; -import org.apache.nifi.components.PropertyDescriptor; -import org.apache.nifi.dbcp.hive.Hive_1_1DBCPService; -import org.apache.nifi.expression.ExpressionLanguageScope; -import org.apache.nifi.flowfile.FlowFile; -import org.apache.nifi.flowfile.attributes.CoreAttributes; -import org.apache.nifi.logging.ComponentLog; -import org.apache.nifi.processor.ProcessContext; -import org.apache.nifi.processor.ProcessSession; -import org.apache.nifi.processor.ProcessSessionFactory; -import org.apache.nifi.processor.Relationship; -import org.apache.nifi.processor.exception.ProcessException; -import org.apache.nifi.processor.util.StandardValidators; -import org.apache.nifi.processor.util.pattern.PartialFunctions; -import org.apache.nifi.util.StopWatch; -import org.apache.nifi.util.hive.CsvOutputOptions; -import org.apache.nifi.util.hive.HiveJdbcCommon; - -import java.nio.charset.Charset; -import java.sql.Connection; -import java.sql.PreparedStatement; -import java.sql.ResultSet; -import java.sql.SQLException; -import java.sql.Statement; -import java.util.ArrayList; -import java.util.Collections; -import java.util.HashMap; -import java.util.HashSet; -import java.util.LinkedList; -import java.util.List; -import java.util.Map; -import java.util.Set; -import java.util.UUID; -import java.util.concurrent.TimeUnit; -import java.util.concurrent.atomic.AtomicLong; - -import static org.apache.nifi.util.hive.HiveJdbcCommon.AVRO; -import static org.apache.nifi.util.hive.HiveJdbcCommon.CSV; -import static org.apache.nifi.util.hive.HiveJdbcCommon.CSV_MIME_TYPE; -import static org.apache.nifi.util.hive.HiveJdbcCommon.MIME_TYPE_AVRO_BINARY; -import static org.apache.nifi.util.hive.HiveJdbcCommon.NORMALIZE_NAMES_FOR_AVRO; - -@EventDriven -@InputRequirement(Requirement.INPUT_ALLOWED) -@Tags({"hive", "sql", "select", "jdbc", "query", "database"}) -@CapabilityDescription("Execute provided HiveQL SELECT query against a Hive database connection. Query result will be converted to Avro or CSV format." - + " Streaming is used so arbitrarily large result sets are supported. This processor can be scheduled to run on " - + "a timer, or cron expression, using the standard scheduling methods, or it can be triggered by an incoming FlowFile. " - + "If it is triggered by an incoming FlowFile, then attributes of that FlowFile will be available when evaluating the " - + "select query. FlowFile attribute 'selecthiveql.row.count' indicates how many rows were selected.") -@WritesAttributes({ - @WritesAttribute(attribute = "mime.type", description = "Sets the MIME type for the outgoing flowfile to application/avro-binary for Avro or text/csv for CSV."), - @WritesAttribute(attribute = "filename", description = "Adds .avro or .csv to the filename attribute depending on which output format is selected."), - @WritesAttribute(attribute = "selecthiveql.row.count", description = "Indicates how many rows were selected/returned by the query."), - @WritesAttribute(attribute = "fragment.identifier", description = "If 'Max Rows Per Flow File' is set then all FlowFiles from the same query result set " - + "will have the same value for the fragment.identifier attribute. This can then be used to correlate the results."), - @WritesAttribute(attribute = "fragment.count", description = "If 'Max Rows Per Flow File' is set then this is the total number of " - + "FlowFiles produced by a single ResultSet. This can be used in conjunction with the " - + "fragment.identifier attribute in order to know how many FlowFiles belonged to the same incoming ResultSet."), - @WritesAttribute(attribute = "fragment.index", description = "If 'Max Rows Per Flow File' is set then the position of this FlowFile in the list of " - + "outgoing FlowFiles that were all derived from the same result set FlowFile. This can be " - + "used in conjunction with the fragment.identifier attribute to know which FlowFiles originated from the same query result set and in what order " - + "FlowFiles were produced"), - @WritesAttribute(attribute = "query.input.tables", description = "Contains input table names in comma delimited 'databaseName.tableName' format.") -}) -@DeprecationNotice(classNames = "org.apache.nifi.processors.hive.SelectHive3QL") -public class SelectHive_1_1QL extends AbstractHive_1_1QLProcessor { - - public static final String RESULT_ROW_COUNT = "selecthiveql.row.count"; - - // Relationships - public static final Relationship REL_SUCCESS = new Relationship.Builder() - .name("success") - .description("Successfully created FlowFile from HiveQL query result set.") - .build(); - public static final Relationship REL_FAILURE = new Relationship.Builder() - .name("failure") - .description("HiveQL query execution failed. Incoming FlowFile will be penalized and routed to this relationship.") - .build(); - - - public static final PropertyDescriptor HIVEQL_PRE_QUERY = new PropertyDescriptor.Builder() - .name("hive-pre-query") - .displayName("HiveQL Pre-Query") - .description("A semicolon-delimited list of queries executed before the main SQL query is executed. " - + "Example: 'set tez.queue.name=queue1; set hive.exec.orc.split.strategy=ETL; set hive.exec.reducers.bytes.per.reducer=1073741824'. " - + "Note, the results/outputs of these queries will be suppressed if successfully executed.") - .required(false) - .addValidator(StandardValidators.NON_EMPTY_VALIDATOR) - .expressionLanguageSupported(ExpressionLanguageScope.FLOWFILE_ATTRIBUTES) - .build(); - - public static final PropertyDescriptor HIVEQL_SELECT_QUERY = new PropertyDescriptor.Builder() - .name("hive-query") - .displayName("HiveQL Select Query") - .description("HiveQL SELECT query to execute. If this is not set, the query is assumed to be in the content of an incoming FlowFile.") - .required(false) - .addValidator(StandardValidators.NON_EMPTY_VALIDATOR) - .expressionLanguageSupported(ExpressionLanguageScope.FLOWFILE_ATTRIBUTES) - .build(); - - public static final PropertyDescriptor HIVEQL_POST_QUERY = new PropertyDescriptor.Builder() - .name("hive-post-query") - .displayName("HiveQL Post-Query") - .description("A semicolon-delimited list of queries executed after the main SQL query is executed. " - + "Note, the results/outputs of these queries will be suppressed if successfully executed.") - .required(false) - .addValidator(StandardValidators.NON_EMPTY_VALIDATOR) - .expressionLanguageSupported(ExpressionLanguageScope.FLOWFILE_ATTRIBUTES) - .build(); - - public static final PropertyDescriptor FETCH_SIZE = new PropertyDescriptor.Builder() - .name("hive-fetch-size") - .displayName("Fetch Size") - .description("The number of result rows to be fetched from the result set at a time. This is a hint to the driver and may not be " - + "honored and/or exact. If the value specified is zero, then the hint is ignored.") - .defaultValue("0") - .required(true) - .addValidator(StandardValidators.NON_NEGATIVE_INTEGER_VALIDATOR) - .expressionLanguageSupported(ExpressionLanguageScope.FLOWFILE_ATTRIBUTES) - .build(); - - public static final PropertyDescriptor MAX_ROWS_PER_FLOW_FILE = new PropertyDescriptor.Builder() - .name("hive-max-rows") - .displayName("Max Rows Per Flow File") - .description("The maximum number of result rows that will be included in a single FlowFile. " + - "This will allow you to break up very large result sets into multiple FlowFiles. If the value specified is zero, then all rows are returned in a single FlowFile.") - .defaultValue("0") - .required(true) - .addValidator(StandardValidators.NON_NEGATIVE_INTEGER_VALIDATOR) - .expressionLanguageSupported(ExpressionLanguageScope.FLOWFILE_ATTRIBUTES) - .build(); - - public static final PropertyDescriptor MAX_FRAGMENTS = new PropertyDescriptor.Builder() - .name("hive-max-frags") - .displayName("Maximum Number of Fragments") - .description("The maximum number of fragments. If the value specified is zero, then all fragments are returned. " + - "This prevents OutOfMemoryError when this processor ingests huge table.") - .defaultValue("0") - .required(true) - .addValidator(StandardValidators.NON_NEGATIVE_INTEGER_VALIDATOR) - .expressionLanguageSupported(ExpressionLanguageScope.FLOWFILE_ATTRIBUTES) - .build(); - - public static final PropertyDescriptor HIVEQL_CSV_HEADER = new PropertyDescriptor.Builder() - .name("csv-header") - .displayName("CSV Header") - .description("Include Header in Output") - .required(true) - .allowableValues("true", "false") - .defaultValue("true") - .addValidator(StandardValidators.BOOLEAN_VALIDATOR) - .build(); - - public static final PropertyDescriptor HIVEQL_CSV_ALT_HEADER = new PropertyDescriptor.Builder() - .name("csv-alt-header") - .displayName("Alternate CSV Header") - .description("Comma separated list of header fields") - .required(false) - .addValidator(StandardValidators.NON_EMPTY_VALIDATOR) - .expressionLanguageSupported(ExpressionLanguageScope.FLOWFILE_ATTRIBUTES) - .build(); - - public static final PropertyDescriptor HIVEQL_CSV_DELIMITER = new PropertyDescriptor.Builder() - .name("csv-delimiter") - .displayName("CSV Delimiter") - .description("CSV Delimiter used to separate fields") - .required(true) - .defaultValue(",") - .addValidator(StandardValidators.NON_EMPTY_VALIDATOR) - .expressionLanguageSupported(ExpressionLanguageScope.FLOWFILE_ATTRIBUTES) - .build(); - - public static final PropertyDescriptor HIVEQL_CSV_QUOTE = new PropertyDescriptor.Builder() - .name("csv-quote") - .displayName("CSV Quote") - .description("Whether to force quoting of CSV fields. Note that this might conflict with the setting for CSV Escape.") - .required(true) - .allowableValues("true", "false") - .defaultValue("true") - .addValidator(StandardValidators.BOOLEAN_VALIDATOR) - .build(); - public static final PropertyDescriptor HIVEQL_CSV_ESCAPE = new PropertyDescriptor.Builder() - .name("csv-escape") - .displayName("CSV Escape") - .description("Whether to escape CSV strings in output. Note that this might conflict with the setting for CSV Quote.") - .required(true) - .allowableValues("true", "false") - .defaultValue("true") - .addValidator(StandardValidators.BOOLEAN_VALIDATOR) - .build(); - - public static final PropertyDescriptor HIVEQL_OUTPUT_FORMAT = new PropertyDescriptor.Builder() - .name("hive-output-format") - .displayName("Output Format") - .description("How to represent the records coming from Hive (Avro, CSV, e.g.)") - .required(true) - .allowableValues(AVRO, CSV) - .defaultValue(AVRO) - .expressionLanguageSupported(ExpressionLanguageScope.NONE) - .build(); - - private final static List propertyDescriptors; - private final static Set relationships; - - /* - * Will ensure that the list of property descriptors is built only once. - * Will also create a Set of relationships - */ - static { - List _propertyDescriptors = new ArrayList<>(); - _propertyDescriptors.add(HIVE_DBCP_SERVICE); - _propertyDescriptors.add(HIVEQL_PRE_QUERY); - _propertyDescriptors.add(HIVEQL_SELECT_QUERY); - _propertyDescriptors.add(HIVEQL_POST_QUERY); - _propertyDescriptors.add(FETCH_SIZE); - _propertyDescriptors.add(MAX_ROWS_PER_FLOW_FILE); - _propertyDescriptors.add(MAX_FRAGMENTS); - _propertyDescriptors.add(HIVEQL_OUTPUT_FORMAT); - _propertyDescriptors.add(NORMALIZE_NAMES_FOR_AVRO); - _propertyDescriptors.add(HIVEQL_CSV_HEADER); - _propertyDescriptors.add(HIVEQL_CSV_ALT_HEADER); - _propertyDescriptors.add(HIVEQL_CSV_DELIMITER); - _propertyDescriptors.add(HIVEQL_CSV_QUOTE); - _propertyDescriptors.add(HIVEQL_CSV_ESCAPE); - _propertyDescriptors.add(CHARSET); - propertyDescriptors = Collections.unmodifiableList(_propertyDescriptors); - - Set _relationships = new HashSet<>(); - _relationships.add(REL_SUCCESS); - _relationships.add(REL_FAILURE); - relationships = Collections.unmodifiableSet(_relationships); - } - - @Override - protected List getSupportedPropertyDescriptors() { - return propertyDescriptors; - } - - @Override - public Set getRelationships() { - return relationships; - } - - @OnScheduled - public void setup(ProcessContext context) { - // If the query is not set, then an incoming flow file is needed. Otherwise fail the initialization - if (!context.getProperty(HIVEQL_SELECT_QUERY).isSet() && !context.hasIncomingConnection()) { - final String errorString = "Either the Select Query must be specified or there must be an incoming connection " - + "providing flowfile(s) containing a SQL select query"; - getLogger().error(errorString); - throw new ProcessException(errorString); - } - } - - @Override - public void onTrigger(ProcessContext context, ProcessSessionFactory sessionFactory) throws ProcessException { - PartialFunctions.onTrigger(context, sessionFactory, getLogger(), session -> onTrigger(context, session)); - } - - private void onTrigger(final ProcessContext context, final ProcessSession session) throws ProcessException { - FlowFile fileToProcess = (context.hasIncomingConnection() ? session.get() : null); - FlowFile flowfile = null; - - // If we have no FlowFile, and all incoming connections are self-loops then we can continue on. - // However, if we have no FlowFile and we have connections coming from other Processors, then - // we know that we should run only if we have a FlowFile. - if (context.hasIncomingConnection()) { - if (fileToProcess == null && context.hasNonLoopConnection()) { - return; - } - } - - final ComponentLog logger = getLogger(); - final Hive_1_1DBCPService dbcpService = context.getProperty(HIVE_DBCP_SERVICE).asControllerService(Hive_1_1DBCPService.class); - final Charset charset = Charset.forName(context.getProperty(CHARSET).getValue()); - - List preQueries = getQueries(context.getProperty(HIVEQL_PRE_QUERY).evaluateAttributeExpressions(fileToProcess).getValue()); - List postQueries = getQueries(context.getProperty(HIVEQL_POST_QUERY).evaluateAttributeExpressions(fileToProcess).getValue()); - - final boolean flowbased = !(context.getProperty(HIVEQL_SELECT_QUERY).isSet()); - - // Source the SQL - String hqlStatement; - - if (context.getProperty(HIVEQL_SELECT_QUERY).isSet()) { - hqlStatement = context.getProperty(HIVEQL_SELECT_QUERY).evaluateAttributeExpressions(fileToProcess).getValue(); - } else { - // If the query is not set, then an incoming flow file is required, and expected to contain a valid SQL select query. - // If there is no incoming connection, onTrigger will not be called as the processor will fail when scheduled. - final StringBuilder queryContents = new StringBuilder(); - session.read(fileToProcess, in -> queryContents.append(IOUtils.toString(in, charset))); - hqlStatement = queryContents.toString(); - } - - - final Integer fetchSize = context.getProperty(FETCH_SIZE).evaluateAttributeExpressions(fileToProcess).asInteger(); - final Integer maxRowsPerFlowFile = context.getProperty(MAX_ROWS_PER_FLOW_FILE).evaluateAttributeExpressions(fileToProcess).asInteger(); - final Integer maxFragments = context.getProperty(MAX_FRAGMENTS).isSet() - ? context.getProperty(MAX_FRAGMENTS).evaluateAttributeExpressions(fileToProcess).asInteger() - : 0; - final String outputFormat = context.getProperty(HIVEQL_OUTPUT_FORMAT).getValue(); - final boolean convertNamesForAvro = context.getProperty(NORMALIZE_NAMES_FOR_AVRO).asBoolean(); - final StopWatch stopWatch = new StopWatch(true); - final boolean header = context.getProperty(HIVEQL_CSV_HEADER).asBoolean(); - final String altHeader = context.getProperty(HIVEQL_CSV_ALT_HEADER).evaluateAttributeExpressions(fileToProcess).getValue(); - final String delimiter = context.getProperty(HIVEQL_CSV_DELIMITER).evaluateAttributeExpressions(fileToProcess).getValue(); - final boolean quote = context.getProperty(HIVEQL_CSV_QUOTE).asBoolean(); - final boolean escape = context.getProperty(HIVEQL_CSV_HEADER).asBoolean(); - final String fragmentIdentifier = UUID.randomUUID().toString(); - - try (final Connection con = dbcpService.getConnection(fileToProcess == null ? Collections.emptyMap() : fileToProcess.getAttributes()); - final Statement st = (flowbased ? con.prepareStatement(hqlStatement) : con.createStatement()) - ) { - Pair failure = executeConfigStatements(con, preQueries); - if (failure != null) { - // In case of failure, assigning config query to "hqlStatement" to follow current error handling - hqlStatement = failure.getLeft(); - flowfile = (fileToProcess == null) ? session.create() : fileToProcess; - fileToProcess = null; - throw failure.getRight(); - } - if (fetchSize != null && fetchSize > 0) { - try { - st.setFetchSize(fetchSize); - } catch (SQLException se) { - // Not all drivers support this, just log the error (at debug level) and move on - logger.debug("Cannot set fetch size to {} due to {}", new Object[]{fetchSize, se.getLocalizedMessage()}, se); - } - } - - final List resultSetFlowFiles = new ArrayList<>(); - try { - logger.debug("Executing query {}", new Object[]{hqlStatement}); - if (flowbased) { - // Hive JDBC Doesn't Support this yet: - // ParameterMetaData pmd = ((PreparedStatement)st).getParameterMetaData(); - // int paramCount = pmd.getParameterCount(); - - // Alternate way to determine number of params in SQL. - int paramCount = StringUtils.countMatches(hqlStatement, "?"); - - if (paramCount > 0) { - setParameters(1, (PreparedStatement) st, paramCount, fileToProcess.getAttributes()); - } - } - - final ResultSet resultSet; - - try { - resultSet = (flowbased ? ((PreparedStatement) st).executeQuery() : st.executeQuery(hqlStatement)); - } catch (SQLException se) { - // If an error occurs during the query, a flowfile is expected to be routed to failure, so ensure one here - flowfile = (fileToProcess == null) ? session.create() : fileToProcess; - fileToProcess = null; - throw se; - } - - int fragmentIndex = 0; - String baseFilename = (fileToProcess != null) ? fileToProcess.getAttribute(CoreAttributes.FILENAME.key()) : null; - while (true) { - final AtomicLong nrOfRows = new AtomicLong(0L); - flowfile = (fileToProcess == null) ? session.create() : session.create(fileToProcess); - if (baseFilename == null) { - baseFilename = flowfile.getAttribute(CoreAttributes.FILENAME.key()); - } - try { - flowfile = session.write(flowfile, out -> { - try { - if (AVRO.equals(outputFormat)) { - nrOfRows.set(HiveJdbcCommon.convertToAvroStream(resultSet, out, maxRowsPerFlowFile, convertNamesForAvro)); - } else if (CSV.equals(outputFormat)) { - CsvOutputOptions options = new CsvOutputOptions(header, altHeader, delimiter, quote, escape, maxRowsPerFlowFile); - nrOfRows.set(HiveJdbcCommon.convertToCsvStream(resultSet, out, options)); - } else { - nrOfRows.set(0L); - throw new ProcessException("Unsupported output format: " + outputFormat); - } - } catch (final SQLException | RuntimeException e) { - throw new ProcessException("Error during database query or conversion of records.", e); - } - }); - } catch (ProcessException e) { - // Add flowfile to results before rethrowing so it will be removed from session in outer catch - resultSetFlowFiles.add(flowfile); - throw e; - } - - if (nrOfRows.get() > 0 || resultSetFlowFiles.isEmpty()) { - final Map attributes = new HashMap<>(); - // Set attribute for how many rows were selected - attributes.put(RESULT_ROW_COUNT, String.valueOf(nrOfRows.get())); - - try { - // Set input/output table names by parsing the query - attributes.putAll(toQueryTableAttributes(findTableNames(hqlStatement))); - } catch (Exception e) { - // If failed to parse the query, just log a warning message, but continue. - getLogger().warn("Failed to parse query: {} due to {}", new Object[]{hqlStatement, e}, e); - } - - // Set MIME type on output document and add extension to filename - if (AVRO.equals(outputFormat)) { - attributes.put(CoreAttributes.MIME_TYPE.key(), MIME_TYPE_AVRO_BINARY); - attributes.put(CoreAttributes.FILENAME.key(), baseFilename + "." + fragmentIndex + ".avro"); - } else if (CSV.equals(outputFormat)) { - attributes.put(CoreAttributes.MIME_TYPE.key(), CSV_MIME_TYPE); - attributes.put(CoreAttributes.FILENAME.key(), baseFilename + "." + fragmentIndex + ".csv"); - } - - if (maxRowsPerFlowFile > 0) { - attributes.put("fragment.identifier", fragmentIdentifier); - attributes.put("fragment.index", String.valueOf(fragmentIndex)); - } - - flowfile = session.putAllAttributes(flowfile, attributes); - - logger.info("{} contains {} " + outputFormat + " records; transferring to 'success'", - new Object[]{flowfile, nrOfRows.get()}); - - if (context.hasIncomingConnection()) { - // If the flow file came from an incoming connection, issue a Fetch provenance event - session.getProvenanceReporter().fetch(flowfile, dbcpService.getConnectionURL(), - "Retrieved " + nrOfRows.get() + " rows", stopWatch.getElapsed(TimeUnit.MILLISECONDS)); - } else { - // If we created a flow file from rows received from Hive, issue a Receive provenance event - session.getProvenanceReporter().receive(flowfile, dbcpService.getConnectionURL(), stopWatch.getElapsed(TimeUnit.MILLISECONDS)); - } - resultSetFlowFiles.add(flowfile); - } else { - // If there were no rows returned (and the first flow file has been sent, we're done processing, so remove the flowfile and carry on - session.remove(flowfile); - if (resultSetFlowFiles != null && resultSetFlowFiles.size()>0) { - flowfile = resultSetFlowFiles.get(resultSetFlowFiles.size()-1); - } - break; - } - - fragmentIndex++; - if (maxFragments > 0 && fragmentIndex >= maxFragments) { - break; - } - } - - for (int i = 0; i < resultSetFlowFiles.size(); i++) { - // Set count on all FlowFiles - if (maxRowsPerFlowFile > 0) { - resultSetFlowFiles.set(i, - session.putAttribute(resultSetFlowFiles.get(i), "fragment.count", Integer.toString(fragmentIndex))); - } - } - - } catch (final SQLException e) { - throw e; - } - - failure = executeConfigStatements(con, postQueries); - if (failure != null) { - hqlStatement = failure.getLeft(); - if (resultSetFlowFiles != null) { - resultSetFlowFiles.forEach(ff -> session.remove(ff)); - } - flowfile = (fileToProcess == null) ? session.create() : fileToProcess; - fileToProcess = null; - throw failure.getRight(); - } - - session.transfer(resultSetFlowFiles, REL_SUCCESS); - if (fileToProcess != null) { - session.remove(fileToProcess); - } - } catch (final ProcessException | SQLException e) { - logger.error("Issue processing SQL {} due to {}.", new Object[]{hqlStatement, e}); - if (flowfile == null) { - // This can happen if any exceptions occur while setting up the connection, statement, etc. - logger.error("Unable to execute HiveQL select query {} due to {}. No FlowFile to route to failure", - new Object[]{hqlStatement, e}); - context.yield(); - } else { - if (context.hasIncomingConnection()) { - logger.error("Unable to execute HiveQL select query {} for {} due to {}; routing to failure", - new Object[]{hqlStatement, flowfile, e}); - flowfile = session.penalize(flowfile); - } else { - logger.error("Unable to execute HiveQL select query {} due to {}; routing to failure", - new Object[]{hqlStatement, e}); - context.yield(); - } - session.transfer(flowfile, REL_FAILURE); - } - } - } - - /* - * Executes given queries using pre-defined connection. - * Returns null on success, or a query string if failed. - */ - protected Pair executeConfigStatements(final Connection con, final List configQueries){ - if (configQueries == null || configQueries.isEmpty()) { - return null; - } - - for (String confSQL : configQueries) { - try(final Statement st = con.createStatement()){ - st.execute(confSQL); - } catch (SQLException e) { - return Pair.of(confSQL, e); - } - } - return null; - } - - protected List getQueries(final String value) { - if (value == null || value.length() == 0 || value.trim().length() == 0) { - return null; - } - final List queries = new LinkedList<>(); - for (String query : value.split(";")) { - if (query.trim().length() > 0) { - queries.add(query.trim()); - } - } - return queries; - } -} \ No newline at end of file diff --git a/nifi-nar-bundles/nifi-hive-bundle/nifi-hive_1_1-processors/src/main/java/org/apache/nifi/processors/hive/UpdateHive_1_1Table.java b/nifi-nar-bundles/nifi-hive-bundle/nifi-hive_1_1-processors/src/main/java/org/apache/nifi/processors/hive/UpdateHive_1_1Table.java deleted file mode 100644 index bb580b1a69..0000000000 --- a/nifi-nar-bundles/nifi-hive-bundle/nifi-hive_1_1-processors/src/main/java/org/apache/nifi/processors/hive/UpdateHive_1_1Table.java +++ /dev/null @@ -1,853 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.nifi.processors.hive; - -import org.apache.nifi.annotation.behavior.InputRequirement; -import org.apache.nifi.annotation.behavior.ReadsAttribute; -import org.apache.nifi.annotation.behavior.ReadsAttributes; -import org.apache.nifi.annotation.behavior.RequiresInstanceClassLoading; -import org.apache.nifi.annotation.behavior.WritesAttribute; -import org.apache.nifi.annotation.behavior.WritesAttributes; -import org.apache.nifi.annotation.documentation.CapabilityDescription; -import org.apache.nifi.annotation.documentation.DeprecationNotice; -import org.apache.nifi.annotation.documentation.Tags; -import org.apache.nifi.components.AllowableValue; -import org.apache.nifi.components.PropertyDescriptor; -import org.apache.nifi.components.ValidationContext; -import org.apache.nifi.components.ValidationResult; -import org.apache.nifi.components.Validator; -import org.apache.nifi.dbcp.hive.Hive_1_1DBCPService; -import org.apache.nifi.expression.ExpressionLanguageScope; -import org.apache.nifi.flowfile.FlowFile; -import org.apache.nifi.flowfile.attributes.CoreAttributes; -import org.apache.nifi.logging.ComponentLog; -import org.apache.nifi.processor.AbstractProcessor; -import org.apache.nifi.processor.ProcessContext; -import org.apache.nifi.processor.ProcessSession; -import org.apache.nifi.processor.ProcessorInitializationContext; -import org.apache.nifi.processor.Relationship; -import org.apache.nifi.processor.exception.ProcessException; -import org.apache.nifi.processor.util.StandardValidators; -import org.apache.nifi.processor.util.pattern.DiscontinuedException; -import org.apache.nifi.processors.hadoop.exception.RecordReaderFactoryException; -import org.apache.nifi.serialization.MalformedRecordException; -import org.apache.nifi.serialization.RecordReader; -import org.apache.nifi.serialization.RecordReaderFactory; -import org.apache.nifi.serialization.RecordSetWriter; -import org.apache.nifi.serialization.RecordSetWriterFactory; -import org.apache.nifi.serialization.SimpleRecordSchema; -import org.apache.nifi.serialization.WriteResult; -import org.apache.nifi.serialization.record.DataType; -import org.apache.nifi.serialization.record.MapRecord; -import org.apache.nifi.serialization.record.Record; -import org.apache.nifi.serialization.record.RecordField; -import org.apache.nifi.serialization.record.RecordFieldType; -import org.apache.nifi.serialization.record.RecordSchema; -import org.apache.nifi.serialization.record.type.ArrayDataType; -import org.apache.nifi.serialization.record.type.ChoiceDataType; -import org.apache.nifi.serialization.record.type.MapDataType; -import org.apache.nifi.serialization.record.type.RecordDataType; -import org.apache.nifi.util.StringUtils; - -import java.io.IOException; -import java.io.InputStream; -import java.sql.Connection; -import java.sql.ResultSet; -import java.sql.SQLException; -import java.sql.Statement; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Collection; -import java.util.Collections; -import java.util.HashMap; -import java.util.HashSet; -import java.util.List; -import java.util.Map; -import java.util.Objects; -import java.util.Set; -import java.util.stream.Collectors; - - -@Tags({"hive", "metadata", "jdbc", "database", "table"}) -@CapabilityDescription("This processor uses a Hive JDBC connection and incoming records to generate any Hive 1.1 table changes needed to support the incoming records.") -@ReadsAttributes({ - @ReadsAttribute(attribute = "hive.table.management.strategy", description = "This attribute is read if the 'Table Management Strategy' property is configured " - + "to use the value of this attribute. The value of this attribute should correspond (ignoring case) to a valid option of the 'Table Management Strategy' property.") -}) -@WritesAttributes({ - @WritesAttribute(attribute = "output.table", description = "This attribute is written on the flow files routed to the 'success' " - + "and 'failure' relationships, and contains the target table name."), - @WritesAttribute(attribute = "output.path", description = "This attribute is written on the flow files routed to the 'success' " - + "and 'failure' relationships, and contains the path on the file system to the table (or partition location if the table is partitioned)."), - @WritesAttribute(attribute = "mime.type", description = "Sets the mime.type attribute to the MIME Type specified by the Record Writer, only if a Record Writer is specified " - + "and Update Field Names is 'true'."), - @WritesAttribute(attribute = "record.count", description = "Sets the number of records in the FlowFile, only if a Record Writer is specified and Update Field Names is 'true'.") -}) -@InputRequirement(InputRequirement.Requirement.INPUT_REQUIRED) -@RequiresInstanceClassLoading -@DeprecationNotice(classNames = "org.apache.nifi.processors.hive.UpdateHive3Table") -public class UpdateHive_1_1Table extends AbstractProcessor { - - static final String TEXTFILE = "TEXTFILE"; - static final String SEQUENCEFILE = "SEQUENCEFILE"; - static final String ORC = "ORC"; - static final String PARQUET = "PARQUET"; - static final String AVRO = "AVRO"; - static final String RCFILE = "RCFILE"; - - static final AllowableValue TEXTFILE_STORAGE = new AllowableValue(TEXTFILE, TEXTFILE, "Stored as plain text files. TEXTFILE is the default file format, unless the configuration " - + "parameter hive.default.fileformat has a different setting."); - static final AllowableValue SEQUENCEFILE_STORAGE = new AllowableValue(SEQUENCEFILE, SEQUENCEFILE, "Stored as compressed Sequence Files."); - static final AllowableValue ORC_STORAGE = new AllowableValue(ORC, ORC, "Stored as ORC file format. Supports ACID Transactions & Cost-based Optimizer (CBO). " - + "Stores column-level metadata."); - static final AllowableValue PARQUET_STORAGE = new AllowableValue(PARQUET, PARQUET, "Stored as Parquet format for the Parquet columnar storage format."); - static final AllowableValue AVRO_STORAGE = new AllowableValue(AVRO, AVRO, "Stored as Avro format."); - static final AllowableValue RCFILE_STORAGE = new AllowableValue(RCFILE, RCFILE, "Stored as Record Columnar File format."); - - static final AllowableValue CREATE_IF_NOT_EXISTS = new AllowableValue("Create If Not Exists", "Create If Not Exists", - "Create a table with the given schema if it does not already exist"); - static final AllowableValue FAIL_IF_NOT_EXISTS = new AllowableValue("Fail If Not Exists", "Fail If Not Exists", - "If the target does not already exist, log an error and route the flowfile to failure"); - - static final String TABLE_MANAGEMENT_STRATEGY_ATTRIBUTE = "hive.table.management.strategy"; - static final AllowableValue MANAGED_TABLE = new AllowableValue("Managed", "Managed", - "Any tables created by this processor will be managed tables (see Hive documentation for details)."); - static final AllowableValue EXTERNAL_TABLE = new AllowableValue("External", "External", - "Any tables created by this processor will be external tables located at the `External Table Location` property value."); - static final AllowableValue ATTRIBUTE_DRIVEN_TABLE = new AllowableValue("Use '" + TABLE_MANAGEMENT_STRATEGY_ATTRIBUTE + "' Attribute", - "Use '" + TABLE_MANAGEMENT_STRATEGY_ATTRIBUTE + "' Attribute", - "Inspects the '" + TABLE_MANAGEMENT_STRATEGY_ATTRIBUTE + "' FlowFile attribute to determine the table management strategy. The value " - + "of this attribute must be a case-insensitive match to one of the other allowable values (Managed, External, e.g.)."); - - static final String ATTR_OUTPUT_TABLE = "output.table"; - static final String ATTR_OUTPUT_PATH = "output.path"; - - // Properties - static final PropertyDescriptor RECORD_READER = new PropertyDescriptor.Builder() - .name("record-reader") - .displayName("Record Reader") - .description("The service for reading incoming flow files. The reader is only used to determine the schema of the records, the actual records will not be processed.") - .identifiesControllerService(RecordReaderFactory.class) - .required(true) - .build(); - - static final PropertyDescriptor HIVE_DBCP_SERVICE = new PropertyDescriptor.Builder() - .name("hive11-dbcp-service") - .displayName("Hive Database Connection Pooling Service") - .description("The Hive Controller Service that is used to obtain connection(s) to the Hive database") - .required(true) - .identifiesControllerService(Hive_1_1DBCPService.class) - .build(); - - static final PropertyDescriptor TABLE_NAME = new PropertyDescriptor.Builder() - .name("hive11-table-name") - .displayName("Table Name") - .description("The name of the database table to update. If the table does not exist, then it will either be created or an error thrown, depending " - + "on the value of the Create Table property.") - .required(true) - .expressionLanguageSupported(ExpressionLanguageScope.FLOWFILE_ATTRIBUTES) - .addValidator(StandardValidators.NON_EMPTY_VALIDATOR) - .build(); - - static final PropertyDescriptor CREATE_TABLE = new PropertyDescriptor.Builder() - .name("hive11-create-table") - .displayName("Create Table Strategy") - .description("Specifies how to process the target table when it does not exist (create it, fail, e.g.).") - .required(true) - .addValidator(Validator.VALID) - .allowableValues(CREATE_IF_NOT_EXISTS, FAIL_IF_NOT_EXISTS) - .defaultValue(FAIL_IF_NOT_EXISTS.getValue()) - .build(); - - static final PropertyDescriptor TABLE_MANAGEMENT_STRATEGY = new PropertyDescriptor.Builder() - .name("hive11-create-table-management") - .displayName("Create Table Management Strategy") - .description("Specifies (when a table is to be created) whether the table is a managed table or an external table. Note that when External is specified, the " - + "'External Table Location' property must be specified. If the '" + TABLE_MANAGEMENT_STRATEGY_ATTRIBUTE + "' value is selected, 'External Table Location' " - + "must still be specified, but can contain Expression Language or be set to the empty string, and is ignored when the attribute evaluates to 'Managed'.") - .required(true) - .addValidator(Validator.VALID) - .allowableValues(MANAGED_TABLE, EXTERNAL_TABLE, ATTRIBUTE_DRIVEN_TABLE) - .defaultValue(MANAGED_TABLE.getValue()) - .dependsOn(CREATE_TABLE, CREATE_IF_NOT_EXISTS) - .build(); - - static final PropertyDescriptor UPDATE_FIELD_NAMES = new PropertyDescriptor.Builder() - .name("hive11-update-field-names") - .displayName("Update Field Names") - .description("This property indicates whether to update the output schema such that the field names are set to the exact column names from the specified " - + "table. This should be used if the incoming record field names may not match the table's column names in terms of upper- and lower-case. For example, this property should be " - + "set to true if the output FlowFile (and target table storage) is Avro format, as Hive/Impala expects the field names to match the column names exactly.") - .allowableValues("true", "false") - .defaultValue("false") - .required(true) - .build(); - - static final PropertyDescriptor RECORD_WRITER_FACTORY = new PropertyDescriptor.Builder() - .name("hive11-record-writer") - .displayName("Record Writer") - .description("Specifies the Controller Service to use for writing results to a FlowFile. The Record Writer should use Inherit Schema to emulate the inferred schema behavior, i.e. " - + "an explicit schema need not be defined in the writer, and will be supplied by the same logic used to infer the schema from the column types. If Create Table Strategy is set " - + "'Create If Not Exists', the Record Writer's output format must match the Record Reader's format in order for the data to be placed in the created table location. Note that " - + "this property is only used if 'Update Field Names' is set to true and the field names do not all match the column names exactly. If no " - + "update is needed for any field names (or 'Update Field Names' is false), the Record Writer is not used and instead the input FlowFile is routed to success or failure " - + "without modification.") - .identifiesControllerService(RecordSetWriterFactory.class) - .dependsOn(UPDATE_FIELD_NAMES, "true") - .required(true) - .build(); - - static final PropertyDescriptor EXTERNAL_TABLE_LOCATION = new PropertyDescriptor.Builder() - .name("hive11-external-table-location") - .displayName("External Table Location") - .description("Specifies (when an external table is to be created) the file path (in HDFS, e.g.) to store table data.") - .required(true) - .expressionLanguageSupported(ExpressionLanguageScope.FLOWFILE_ATTRIBUTES) - .addValidator(StandardValidators.ATTRIBUTE_EXPRESSION_LANGUAGE_VALIDATOR) - .dependsOn(TABLE_MANAGEMENT_STRATEGY, EXTERNAL_TABLE, ATTRIBUTE_DRIVEN_TABLE) - .build(); - - static final PropertyDescriptor TABLE_STORAGE_FORMAT = new PropertyDescriptor.Builder() - .name("hive11-storage-format") - .displayName("Create Table Storage Format") - .description("If a table is to be created, the specified storage format will be used.") - .required(true) - .addValidator(StandardValidators.NON_EMPTY_VALIDATOR) - .allowableValues(TEXTFILE_STORAGE, SEQUENCEFILE_STORAGE, ORC_STORAGE, PARQUET_STORAGE, AVRO_STORAGE, RCFILE_STORAGE) - .defaultValue(TEXTFILE) - .dependsOn(CREATE_TABLE, CREATE_IF_NOT_EXISTS) - .build(); - - static final PropertyDescriptor QUERY_TIMEOUT = new PropertyDescriptor.Builder() - .name("hive11query-timeout") - .displayName("Query Timeout") - .description("Sets the number of seconds the driver will wait for a query to execute. " - + "A value of 0 means no timeout. NOTE: Non-zero values may not be supported by the driver.") - .defaultValue("0") - .required(true) - .addValidator(StandardValidators.INTEGER_VALIDATOR) - .expressionLanguageSupported(ExpressionLanguageScope.FLOWFILE_ATTRIBUTES) - .build(); - - static final PropertyDescriptor PARTITION_CLAUSE = new PropertyDescriptor.Builder() - .name("hive11-partition-clause") - .displayName("Partition Clause") - .description("Specifies a comma-separated list of attribute names and optional data types corresponding to the partition columns of the target table. Simply put, if the table is " - + "partitioned or is to be created with partitions, each partition name should be an attribute on the FlowFile and listed in this property. This assumes all incoming records " - + "belong to the same partition and the partition columns are not fields in the record. An example of specifying this field is if PartitionRecord " - + "is upstream and two partition columns 'name' (of type string) and 'age' (of type integer) are used, then this property can be set to 'name string, age int'. The data types " - + "are optional and if partition(s) are to be created they will default to string type if not specified. For non-string primitive types, specifying the data type for existing " - + "partition columns is helpful for interpreting the partition value(s). If the table exists, the data types need not be specified " - + "(and are ignored in that case). This property must be set if the table is partitioned, and there must be an attribute for each partition column in the table. " - + "The values of the attributes will be used as the partition values, and the resulting output.path attribute value will reflect the location of the partition in the filesystem " - + "(for use downstream in processors such as PutHDFS).") - .required(false) - .expressionLanguageSupported(ExpressionLanguageScope.FLOWFILE_ATTRIBUTES) - .addValidator(StandardValidators.NON_EMPTY_VALIDATOR) - .build(); - - // Relationships - public static final Relationship REL_SUCCESS = new Relationship.Builder() - .name("success") - .description("A FlowFile containing records routed to this relationship after the record has been successfully transmitted to Hive.") - .build(); - - public static final Relationship REL_FAILURE = new Relationship.Builder() - .name("failure") - .description("A FlowFile containing records routed to this relationship if the record could not be transmitted to Hive.") - .build(); - - private List propertyDescriptors; - private Set relationships; - - @Override - protected void init(ProcessorInitializationContext context) { - List props = new ArrayList<>(); - props.add(RECORD_READER); - props.add(HIVE_DBCP_SERVICE); - props.add(TABLE_NAME); - props.add(PARTITION_CLAUSE); - props.add(CREATE_TABLE); - props.add(TABLE_MANAGEMENT_STRATEGY); - props.add(EXTERNAL_TABLE_LOCATION); - props.add(TABLE_STORAGE_FORMAT); - props.add(UPDATE_FIELD_NAMES); - props.add(RECORD_WRITER_FACTORY); - props.add(QUERY_TIMEOUT); - - propertyDescriptors = Collections.unmodifiableList(props); - - Set _relationships = new HashSet<>(); - _relationships.add(REL_SUCCESS); - _relationships.add(REL_FAILURE); - relationships = Collections.unmodifiableSet(_relationships); - } - - @Override - protected List getSupportedPropertyDescriptors() { - return propertyDescriptors; - } - - @Override - public Set getRelationships() { - return relationships; - } - - @Override - protected Collection customValidate(ValidationContext validationContext) { - List validationResults = new ArrayList<>(super.customValidate(validationContext)); - final boolean recordWriterFactorySet = validationContext.getProperty(RECORD_WRITER_FACTORY).isSet(); - final boolean createIfNotExists = validationContext.getProperty(CREATE_TABLE).getValue().equals(CREATE_IF_NOT_EXISTS.getValue()); - final boolean updateFieldNames = validationContext.getProperty(UPDATE_FIELD_NAMES).asBoolean(); - - if (!recordWriterFactorySet && updateFieldNames) { - validationResults.add(new ValidationResult.Builder().subject(RECORD_WRITER_FACTORY.getDisplayName()) - .explanation("Record Writer must be set if 'Update Field Names' is true").valid(false).build()); - } - final String tableManagementStrategy = validationContext.getProperty(TABLE_MANAGEMENT_STRATEGY).getValue(); - final boolean managedTable; - if (!ATTRIBUTE_DRIVEN_TABLE.getValue().equals(tableManagementStrategy)) { - managedTable = MANAGED_TABLE.getValue().equals(tableManagementStrategy); - // Ensure valid configuration for external tables - if (createIfNotExists && !managedTable && !validationContext.getProperty(EXTERNAL_TABLE_LOCATION).isSet()) { - validationResults.add(new ValidationResult.Builder().subject(EXTERNAL_TABLE_LOCATION.getDisplayName()) - .explanation("External Table Location must be set when Table Management Strategy is set to External").valid(false).build()); - } - } - return validationResults; - } - - @Override - public void onTrigger(ProcessContext context, ProcessSession session) throws ProcessException { - - FlowFile flowFile = session.get(); - if (flowFile == null) { - return; - } - - final RecordReaderFactory recordReaderFactory = context.getProperty(RECORD_READER).asControllerService(RecordReaderFactory.class); - final RecordSetWriterFactory recordWriterFactory = context.getProperty(RECORD_WRITER_FACTORY).asControllerService(RecordSetWriterFactory.class); - final String tableName = context.getProperty(TABLE_NAME).evaluateAttributeExpressions(flowFile).getValue(); - final String partitionClauseString = context.getProperty(PARTITION_CLAUSE).evaluateAttributeExpressions(flowFile).getValue(); - List partitionClauseElements = null; - if (!StringUtils.isEmpty(partitionClauseString)) { - partitionClauseElements = Arrays.stream(partitionClauseString.split(",")).filter(Objects::nonNull).map(String::trim).collect(Collectors.toList()); - } - - final ComponentLog log = getLogger(); - - try { - final RecordReader reader; - - try (final InputStream in = session.read(flowFile)) { - // if we fail to create the RecordReader then we want to route to failure, so we need to - // handle this separately from the other IOExceptions which normally route to retry - try { - reader = recordReaderFactory.createRecordReader(flowFile, in, getLogger()); - } catch (Exception e) { - throw new RecordReaderFactoryException("Unable to create RecordReader", e); - } - } catch (RecordReaderFactoryException rrfe) { - log.error( - "Failed to create {} for {} - routing to failure", - new Object[]{RecordReader.class.getSimpleName(), flowFile}, - rrfe - ); - // Since we are wrapping the exceptions above there should always be a cause - // but it's possible it might not have a message. This handles that by logging - // the name of the class thrown. - Throwable c = rrfe.getCause(); - if (c != null) { - session.putAttribute(flowFile, "record.error.message", (c.getLocalizedMessage() != null) ? c.getLocalizedMessage() : c.getClass().getCanonicalName() + " Thrown"); - } else { - session.putAttribute(flowFile, "record.error.message", rrfe.getClass().getCanonicalName() + " Thrown"); - } - session.transfer(flowFile, REL_FAILURE); - return; - } - - final RecordSchema recordSchema = reader.getSchema(); - - final boolean createIfNotExists = context.getProperty(CREATE_TABLE).getValue().equals(CREATE_IF_NOT_EXISTS.getValue()); - final boolean updateFieldNames = context.getProperty(UPDATE_FIELD_NAMES).asBoolean(); - if (recordWriterFactory == null && updateFieldNames) { - throw new ProcessException("Record Writer must be set if 'Update Field Names' is true"); - } - final String tableManagementStrategy = context.getProperty(TABLE_MANAGEMENT_STRATEGY).getValue(); - final boolean managedTable; - if (ATTRIBUTE_DRIVEN_TABLE.getValue().equals(tableManagementStrategy)) { - String tableManagementStrategyAttribute = flowFile.getAttribute(TABLE_MANAGEMENT_STRATEGY_ATTRIBUTE); - if (MANAGED_TABLE.getValue().equalsIgnoreCase(tableManagementStrategyAttribute)) { - managedTable = true; - } else if (EXTERNAL_TABLE.getValue().equalsIgnoreCase(tableManagementStrategyAttribute)) { - managedTable = false; - } else { - log.error("The '{}' attribute either does not exist or has invalid value: {}. Must be one of (ignoring case): Managed, External. " - + "Routing flowfile to failure", - new Object[]{TABLE_MANAGEMENT_STRATEGY_ATTRIBUTE, tableManagementStrategyAttribute}); - session.transfer(flowFile, REL_FAILURE); - return; - } - } else { - managedTable = MANAGED_TABLE.getValue().equals(tableManagementStrategy); - } - - // Ensure valid configuration for external tables - if (createIfNotExists && !managedTable && !context.getProperty(EXTERNAL_TABLE_LOCATION).isSet()) { - throw new IOException("External Table Location must be set when Table Management Strategy is set to External"); - } - final String externalTableLocation = managedTable ? null : context.getProperty(EXTERNAL_TABLE_LOCATION).evaluateAttributeExpressions(flowFile).getValue(); - if (!managedTable && StringUtils.isEmpty(externalTableLocation)) { - log.error("External Table Location has invalid value: {}. Routing flowfile to failure", new Object[]{externalTableLocation}); - session.transfer(flowFile, REL_FAILURE); - return; - } - final String storageFormat = context.getProperty(TABLE_STORAGE_FORMAT).getValue(); - final Hive_1_1DBCPService dbcpService = context.getProperty(HIVE_DBCP_SERVICE).asControllerService(Hive_1_1DBCPService.class); - try (final Connection connection = dbcpService.getConnection()) { - final Map attributes = new HashMap<>(flowFile.getAttributes()); - OutputMetadataHolder outputMetadataHolder = checkAndUpdateTableSchema(attributes, connection, recordSchema, tableName, partitionClauseElements, - createIfNotExists, externalTableLocation, storageFormat, updateFieldNames); - if (outputMetadataHolder != null) { - // The output schema changed (i.e. field names were updated), so write out the corresponding FlowFile - try { - final FlowFile inputFlowFile = flowFile; - flowFile = session.write(flowFile, (in, out) -> { - - // if we fail to create the RecordReader then we want to route to failure, so we need to - // handle this separately from the other IOExceptions which normally route to retry - final RecordReader recordReader; - final RecordSetWriter recordSetWriter; - try { - recordReader = recordReaderFactory.createRecordReader(inputFlowFile, in, getLogger()); - recordSetWriter = recordWriterFactory.createWriter(getLogger(), outputMetadataHolder.getOutputSchema(), out, attributes); - } catch (Exception e) { - if(e instanceof IOException) { - throw (IOException) e; - } - throw new IOException(new RecordReaderFactoryException("Unable to create RecordReader", e)); - } - - WriteResult writeResult = updateRecords(recordSchema, outputMetadataHolder, recordReader, recordSetWriter); - recordSetWriter.flush(); - recordSetWriter.close(); - attributes.put("record.count", String.valueOf(writeResult.getRecordCount())); - attributes.put(CoreAttributes.MIME_TYPE.key(), recordSetWriter.getMimeType()); - attributes.putAll(writeResult.getAttributes()); - }); - } catch (final Exception e) { - getLogger().error("Failed to process {}; will route to failure", new Object[]{flowFile, e}); - // Since we are wrapping the exceptions above there should always be a cause - // but it's possible it might not have a message. This handles that by logging - // the name of the class thrown. - Throwable c = e.getCause(); - if (c != null) { - session.putAttribute(flowFile, "record.error.message", (c.getLocalizedMessage() != null) ? c.getLocalizedMessage() : c.getClass().getCanonicalName() + " Thrown"); - } else { - session.putAttribute(flowFile, "record.error.message", e.getClass().getCanonicalName() + " Thrown"); - } - session.transfer(flowFile, REL_FAILURE); - return; - } - - } - attributes.put(ATTR_OUTPUT_TABLE, tableName); - flowFile = session.putAllAttributes(flowFile, attributes); - session.getProvenanceReporter().invokeRemoteProcess(flowFile, dbcpService.getConnectionURL()); - session.transfer(flowFile, REL_SUCCESS); - } - } catch (IOException | SQLException e) { - - flowFile = session.putAttribute(flowFile, ATTR_OUTPUT_TABLE, tableName); - log.error("Exception while processing {} - routing to failure", new Object[]{flowFile}, e); - session.transfer(flowFile, REL_FAILURE); - - } catch (DiscontinuedException e) { - // The input FlowFile processing is discontinued. Keep it in the input queue. - getLogger().warn("Discontinued processing for {} due to {}", new Object[]{flowFile, e}, e); - session.transfer(flowFile, Relationship.SELF); - } catch (Throwable t) { - throw (t instanceof ProcessException) ? (ProcessException) t : new ProcessException(t); - } - } - - private synchronized OutputMetadataHolder checkAndUpdateTableSchema(Map attributes, final Connection conn, final RecordSchema schema, - final String tableName, List partitionClause, final boolean createIfNotExists, - final String externalTableLocation, final String storageFormat, final boolean updateFieldNames) throws IOException { - // Read in the current table metadata, compare it to the reader's schema, and - // add any columns from the schema that are missing in the table - try (Statement s = conn.createStatement()) { - // Determine whether the table exists - ResultSet tables = s.executeQuery("SHOW TABLES"); - List tableNames = new ArrayList<>(); - String hiveTableName; - while (tables.next() && StringUtils.isNotEmpty(hiveTableName = tables.getString(1))) { - tableNames.add(hiveTableName); - } - - List columnsToAdd = new ArrayList<>(); - String outputPath; - boolean tableCreated = false; - if (!tableNames.contains(tableName) && createIfNotExists) { - StringBuilder createTableStatement = new StringBuilder(); - for (RecordField recordField : schema.getFields()) { - String recordFieldName = recordField.getFieldName(); - // The field does not exist in the table, add it - columnsToAdd.add("`" + recordFieldName + "` " + getHiveTypeFromFieldType(recordField.getDataType(), true)); - getLogger().debug("Adding column " + recordFieldName + " to table " + tableName); - } - - // Handle partition clause - if (partitionClause == null) { - partitionClause = Collections.emptyList(); - } - List validatedPartitionClause = new ArrayList<>(partitionClause.size()); - for (String partition : partitionClause) { - String[] partitionInfo = partition.split(" "); - if (partitionInfo.length != 2) { - validatedPartitionClause.add("`" + partitionInfo[0] + "` string"); - } else { - validatedPartitionClause.add("`" + partitionInfo[0] + "` " + partitionInfo[1]); - } - } - - createTableStatement.append("CREATE ") - .append(externalTableLocation == null ? "" : "EXTERNAL ") - .append("TABLE IF NOT EXISTS `") - .append(tableName) - .append("` (") - .append(String.join(", ", columnsToAdd)) - .append(") ") - .append(validatedPartitionClause.isEmpty() ? "" : "PARTITIONED BY (" + String.join(", ", validatedPartitionClause) + ") ") - .append("STORED AS ") - .append(storageFormat) - .append(externalTableLocation == null ? "" : " LOCATION '" + externalTableLocation + "'"); - - String createTableSql = createTableStatement.toString(); - - if (StringUtils.isNotEmpty(createTableSql)) { - // Perform the table create - getLogger().info("Executing Hive DDL: " + createTableSql); - s.execute(createTableSql); - } - - tableCreated = true; - } - - // Process the table (columns, partitions, location, etc.) - List hiveColumns = new ArrayList<>(); - - String describeTable = "DESC FORMATTED `" + tableName + "`"; - ResultSet tableInfo = s.executeQuery(describeTable); - // Result is 3 columns, col_name, data_type, comment. Check the first row for a header and skip if so, otherwise add column name - tableInfo.next(); - String columnName = tableInfo.getString(1); - if (StringUtils.isNotEmpty(columnName) && !columnName.startsWith("#")) { - hiveColumns.add(columnName); - } - // If the column was a header, check for a blank line to follow and skip it, otherwise add the column name - if (columnName.startsWith("#")) { - tableInfo.next(); - columnName = tableInfo.getString(1); - if (StringUtils.isNotEmpty(columnName)) { - hiveColumns.add(columnName); - } - } - - // Collect all column names - while (tableInfo.next() && StringUtils.isNotEmpty(columnName = tableInfo.getString(1))) { - hiveColumns.add(columnName); - } - - // Collect all partition columns - boolean moreRows = true; - boolean headerFound = false; - while (moreRows && !headerFound) { - String line = tableInfo.getString(1); - if ("# Partition Information".equals(line)) { - headerFound = true; - } else if ("# Detailed Table Information".equals(line)) { - // Not partitioned, exit the loop with headerFound = false - break; - } - moreRows = tableInfo.next(); - } - - List partitionColumns = new ArrayList<>(); - List partitionColumnsEqualsValueList = new ArrayList<>(); - List partitionColumnsLocationList = new ArrayList<>(); - if (headerFound) { - // If the table is partitioned, construct the partition=value strings for each partition column - String partitionColumnName; - columnName = tableInfo.getString(1); - if (StringUtils.isNotEmpty(columnName) && !columnName.startsWith("#")) { - partitionColumns.add(columnName); - } - // If the column was a header, check for a blank line to follow and skip it, otherwise add the column name - if (columnName.startsWith("#")) { - tableInfo.next(); - columnName = tableInfo.getString(1); - if (StringUtils.isNotEmpty(columnName)) { - partitionColumns.add(columnName); - } - } - while (tableInfo.next() && StringUtils.isNotEmpty(partitionColumnName = tableInfo.getString(1))) { - partitionColumns.add(partitionColumnName); - } - - final int partitionColumnsSize = partitionColumns.size(); - final int partitionClauseSize = (partitionClause == null) ? 0 : partitionClause.size(); - if (partitionClauseSize != partitionColumnsSize) { - throw new IOException("Found " + partitionColumnsSize + " partition columns but " + partitionClauseSize + " partition values were supplied"); - } - - for (int i = 0; i < partitionClauseSize; i++) { - String partitionName = partitionClause.get(i).split(" ")[0]; - String partitionValue = attributes.get(partitionName); - if (StringUtils.isEmpty(partitionValue)) { - throw new IOException("No value found for partition value attribute '" + partitionName + "'"); - } - if (!partitionColumns.contains(partitionName)) { - throw new IOException("Cannot add partition '" + partitionName + "' to existing table"); - } - partitionColumnsEqualsValueList.add("`" + partitionName + "`='" + partitionValue + "'"); - // Add unquoted version for the output path - partitionColumnsLocationList.add(partitionName + "=" + partitionValue); - } - } - - // Get table location - moreRows = true; - headerFound = false; - while (moreRows && !headerFound) { - String line = tableInfo.getString(1); - if (line.startsWith("Location:")) { - headerFound = true; - continue; // Don't do a next() here, need to get the second column value - } - moreRows = tableInfo.next(); - } - String tableLocation = tableInfo.getString(2); - - String alterTableSql; - // If the table wasn't newly created, alter it accordingly - if (!tableCreated) { - StringBuilder alterTableStatement = new StringBuilder(); - // Handle new columns - for (RecordField recordField : schema.getFields()) { - String recordFieldName = recordField.getFieldName().toLowerCase(); - if (!hiveColumns.contains(recordFieldName) && !partitionColumns.contains(recordFieldName)) { - // The field does not exist in the table (and is not a partition column), add it - columnsToAdd.add("`" + recordFieldName + "` " + getHiveTypeFromFieldType(recordField.getDataType(), true)); - getLogger().info("Adding column " + recordFieldName + " to table " + tableName); - } - } - - if (!columnsToAdd.isEmpty()) { - alterTableStatement.append("ALTER TABLE `") - .append(tableName) - .append("` ADD COLUMNS (") - .append(String.join(", ", columnsToAdd)) - .append(")"); - - alterTableSql = alterTableStatement.toString(); - if (StringUtils.isNotEmpty(alterTableSql)) { - // Perform the table update - getLogger().info("Executing Hive DDL: " + alterTableSql); - s.execute(alterTableSql); - } - } - } - - outputPath = tableLocation; - - // Handle new partition values - if (!partitionColumnsEqualsValueList.isEmpty()) { - alterTableSql = "ALTER TABLE `" + - tableName + - "` ADD IF NOT EXISTS PARTITION (" + - String.join(", ", partitionColumnsEqualsValueList) + - ")"; - if (StringUtils.isNotEmpty(alterTableSql)) { - // Perform the table update - getLogger().info("Executing Hive DDL: " + alterTableSql); - s.execute(alterTableSql); - } - // Add attribute for HDFS location of the partition values - outputPath = tableLocation + "/" + String.join("/", partitionColumnsLocationList); - } - - // If updating field names, return a new RecordSchema, otherwise return null - OutputMetadataHolder outputMetadataHolder; - if (updateFieldNames) { - List inputRecordFields = schema.getFields(); - List outputRecordFields = new ArrayList<>(); - Map fieldMap = new HashMap<>(); - boolean needsUpdating = false; - - for (RecordField inputRecordField : inputRecordFields) { - final String inputRecordFieldName = inputRecordField.getFieldName(); - boolean found = false; - for (String hiveColumnName : hiveColumns) { - if (inputRecordFieldName.equalsIgnoreCase(hiveColumnName)) { - // Set a flag if the field name doesn't match the column name exactly. This overall flag will determine whether - // the records need updating (if true) or not (if false) - if (!inputRecordFieldName.equals(hiveColumnName)) { - needsUpdating = true; - } - fieldMap.put(inputRecordFieldName, hiveColumnName); - outputRecordFields.add(new RecordField(hiveColumnName, inputRecordField.getDataType(), inputRecordField.getDefaultValue(), inputRecordField.isNullable())); - found = true; - break; - } - } - if (!found) { - // If the input field wasn't a Hive table column, add it back to the schema as-is - fieldMap.put(inputRecordFieldName, inputRecordFieldName); - } - } - outputMetadataHolder = needsUpdating ? new OutputMetadataHolder(new SimpleRecordSchema(outputRecordFields), fieldMap) - : null; - } else { - outputMetadataHolder = null; - } - attributes.put(ATTR_OUTPUT_PATH, outputPath); - return outputMetadataHolder; - } catch (Exception e) { - throw new IOException(e); - } - } - - public static String getHiveTypeFromFieldType(DataType rawDataType, boolean hiveFieldNames) { - if (rawDataType == null) { - throw new IllegalArgumentException("Field type is null"); - } - RecordFieldType dataType = rawDataType.getFieldType(); - - if (RecordFieldType.INT.equals(dataType)) { - return "INT"; - } - if (RecordFieldType.LONG.equals(dataType)) { - return "BIGINT"; - } - if (RecordFieldType.BOOLEAN.equals(dataType)) { - return "BOOLEAN"; - } - if (RecordFieldType.DOUBLE.equals(dataType)) { - return "DOUBLE"; - } - if (RecordFieldType.FLOAT.equals(dataType)) { - return "FLOAT"; - } - if (RecordFieldType.DECIMAL.equals(dataType)) { - return "DECIMAL"; - } - if (RecordFieldType.STRING.equals(dataType) || RecordFieldType.ENUM.equals(dataType)) { - return "STRING"; - } - if (RecordFieldType.DATE.equals(dataType)) { - return "DATE"; - } - if (RecordFieldType.TIME.equals(dataType)) { - return "INT"; - } - if (RecordFieldType.TIMESTAMP.equals(dataType)) { - return "TIMESTAMP"; - } - if (RecordFieldType.ARRAY.equals(dataType)) { - ArrayDataType arrayDataType = (ArrayDataType) rawDataType; - if (RecordFieldType.BYTE.getDataType().equals(arrayDataType.getElementType())) { - return "BINARY"; - } - return "ARRAY<" + getHiveTypeFromFieldType(arrayDataType.getElementType(), hiveFieldNames) + ">"; - } - if (RecordFieldType.MAP.equals(dataType)) { - MapDataType mapDataType = (MapDataType) rawDataType; - return "MAP"; - } - if (RecordFieldType.CHOICE.equals(dataType)) { - ChoiceDataType choiceDataType = (ChoiceDataType) rawDataType; - List unionFieldSchemas = choiceDataType.getPossibleSubTypes(); - - if (unionFieldSchemas != null) { - // Ignore null types in union - List hiveFields = unionFieldSchemas.stream() - .map((it) -> getHiveTypeFromFieldType(it, hiveFieldNames)) - .collect(Collectors.toList()); - - // Flatten the field if the union only has one non-null element - return (hiveFields.size() == 1) - ? hiveFields.get(0) - : "UNIONTYPE<" + org.apache.commons.lang3.StringUtils.join(hiveFields, ", ") + ">"; - } - return null; - } - - if (RecordFieldType.RECORD.equals(dataType)) { - RecordDataType recordDataType = (RecordDataType) rawDataType; - List recordFields = recordDataType.getChildSchema().getFields(); - if (recordFields != null) { - List hiveFields = recordFields.stream().map( - recordField -> ("`" + (hiveFieldNames ? recordField.getFieldName().toLowerCase() : recordField.getFieldName()) + "`:" - + getHiveTypeFromFieldType(recordField.getDataType(), hiveFieldNames))).collect(Collectors.toList()); - return "STRUCT<" + org.apache.commons.lang3.StringUtils.join(hiveFields, ", ") + ">"; - } - return null; - } - throw new IllegalArgumentException("Error converting Avro type " + dataType.name() + " to Hive type"); - } - - private synchronized WriteResult updateRecords(final RecordSchema inputRecordSchema, final OutputMetadataHolder outputMetadataHolder, - final RecordReader reader, final RecordSetWriter writer) throws IOException { - try { - writer.beginRecordSet(); - Record inputRecord; - while((inputRecord = reader.nextRecord()) != null) { - List inputRecordFields = inputRecordSchema.getFields(); - Map outputRecordFields = new HashMap<>(inputRecordFields.size()); - // Copy values from input field name to output field name - for(Map.Entry mapping : outputMetadataHolder.getFieldMap().entrySet()) { - outputRecordFields.put(mapping.getValue(), inputRecord.getValue(mapping.getKey())); - } - Record outputRecord = new MapRecord(outputMetadataHolder.getOutputSchema(), outputRecordFields); - writer.write(outputRecord); - } - return writer.finishRecordSet(); - - } catch (MalformedRecordException mre) { - throw new IOException("Error reading records: "+mre.getMessage(), mre); - } - } - - private static class OutputMetadataHolder { - private final RecordSchema outputSchema; - private final Map fieldMap; - - public OutputMetadataHolder(RecordSchema outputSchema, Map fieldMap) { - this.outputSchema = outputSchema; - this.fieldMap = fieldMap; - } - - public RecordSchema getOutputSchema() { - return outputSchema; - } - - public Map getFieldMap() { - return fieldMap; - } - } -} diff --git a/nifi-nar-bundles/nifi-hive-bundle/nifi-hive_1_1-processors/src/main/java/org/apache/nifi/util/hive/AuthenticationFailedException.java b/nifi-nar-bundles/nifi-hive-bundle/nifi-hive_1_1-processors/src/main/java/org/apache/nifi/util/hive/AuthenticationFailedException.java deleted file mode 100644 index 70cc6c13c4..0000000000 --- a/nifi-nar-bundles/nifi-hive-bundle/nifi-hive_1_1-processors/src/main/java/org/apache/nifi/util/hive/AuthenticationFailedException.java +++ /dev/null @@ -1,23 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.nifi.util.hive; - -public class AuthenticationFailedException extends Exception { - public AuthenticationFailedException(String reason, Exception cause) { - super(reason, cause); - } -} \ No newline at end of file diff --git a/nifi-nar-bundles/nifi-hive-bundle/nifi-hive_1_1-processors/src/main/java/org/apache/nifi/util/hive/CsvOutputOptions.java b/nifi-nar-bundles/nifi-hive-bundle/nifi-hive_1_1-processors/src/main/java/org/apache/nifi/util/hive/CsvOutputOptions.java deleted file mode 100644 index 36889129f3..0000000000 --- a/nifi-nar-bundles/nifi-hive-bundle/nifi-hive_1_1-processors/src/main/java/org/apache/nifi/util/hive/CsvOutputOptions.java +++ /dev/null @@ -1,63 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.nifi.util.hive; - -public class CsvOutputOptions { - - private boolean header = true; - private String altHeader = null; - private String delimiter = ","; - private boolean quote = false; - private boolean escape = true; - - private int maxRowsPerFlowFile = 0; - - public boolean isHeader() { - return header; - } - - public String getAltHeader() { - return altHeader; - } - - - public String getDelimiter() { - return delimiter; - } - - - public boolean isQuote() { - return quote; - } - - public boolean isEscape() { - return escape; - } - - public int getMaxRowsPerFlowFile() { - return maxRowsPerFlowFile; - } - - public CsvOutputOptions(boolean header, String altHeader, String delimiter, boolean quote, boolean escape, int maxRowsPerFlowFile) { - this.header = header; - this.altHeader = altHeader; - this.delimiter = delimiter; - this.quote = quote; - this.escape = escape; - this.maxRowsPerFlowFile = maxRowsPerFlowFile; - } -} diff --git a/nifi-nar-bundles/nifi-hive-bundle/nifi-hive_1_1-processors/src/main/java/org/apache/nifi/util/hive/HiveConfigurator.java b/nifi-nar-bundles/nifi-hive-bundle/nifi-hive_1_1-processors/src/main/java/org/apache/nifi/util/hive/HiveConfigurator.java deleted file mode 100644 index b212207ebb..0000000000 --- a/nifi-nar-bundles/nifi-hive-bundle/nifi-hive_1_1-processors/src/main/java/org/apache/nifi/util/hive/HiveConfigurator.java +++ /dev/null @@ -1,136 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.nifi.util.hive; - -import org.apache.commons.lang3.StringUtils; -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.FileSystem; -import org.apache.hadoop.fs.Path; -import org.apache.hadoop.hive.conf.HiveConf; -import org.apache.hadoop.security.UserGroupInformation; -import org.apache.nifi.components.ValidationResult; -import org.apache.nifi.hadoop.KerberosProperties; -import org.apache.nifi.hadoop.SecurityUtil; -import org.apache.nifi.logging.ComponentLog; -import org.apache.nifi.security.krb.KerberosUser; - -import java.io.IOException; -import java.util.ArrayList; -import java.util.Collection; -import java.util.List; -import java.util.concurrent.atomic.AtomicReference; - -public class HiveConfigurator { - - public Collection validate(String configFiles, String principal, String keyTab, String password, - AtomicReference validationResourceHolder, ComponentLog log) { - - final List problems = new ArrayList<>(); - ValidationResources resources = validationResourceHolder.get(); - - // if no resources in the holder, or if the holder has different resources loaded, - // then load the Configuration and set the new resources in the holder - if (resources == null || !configFiles.equals(resources.getConfigResources())) { - log.debug("Reloading validation resources"); - resources = new ValidationResources(configFiles, getConfigurationFromFiles(configFiles)); - validationResourceHolder.set(resources); - } - - final Configuration hiveConfig = resources.getConfiguration(); - - problems.addAll(KerberosProperties.validatePrincipalWithKeytabOrPassword(this.getClass().getSimpleName(), hiveConfig, principal, keyTab, password, log)); - - return problems; - } - - public HiveConf getConfigurationFromFiles(final String configFiles) { - final HiveConf hiveConfig = new HiveConf(); - if (StringUtils.isNotBlank(configFiles)) { - for (final String configFile : configFiles.split(",")) { - hiveConfig.addResource(new Path(configFile.trim())); - } - } - return hiveConfig; - } - - public void preload(Configuration configuration) { - try { - FileSystem.get(configuration).close(); - UserGroupInformation.setConfiguration(configuration); - } catch (IOException ioe) { - // Suppress exception as future uses of this configuration will fail - } - } - - /** - * Acquires a {@link UserGroupInformation} using the given {@link Configuration} and {@link KerberosUser}. - * @see SecurityUtil#getUgiForKerberosUser(Configuration, KerberosUser) - * @param hiveConfig The Configuration to apply to the acquired UserGroupInformation - * @param kerberosUser The KerberosUser to authenticate - * @return A UserGroupInformation instance created using the Subject of the given KerberosUser - * @throws AuthenticationFailedException if authentication fails - */ - public UserGroupInformation authenticate(final Configuration hiveConfig, KerberosUser kerberosUser) throws AuthenticationFailedException { - try { - return SecurityUtil.getUgiForKerberosUser(hiveConfig, kerberosUser); - } catch (IOException ioe) { - throw new AuthenticationFailedException("Kerberos Authentication for Hive failed", ioe); - } - } - - /** - * As of Apache NiFi 1.5.0, due to changes made to - * {@link SecurityUtil#loginKerberos(Configuration, String, String)}, which is used by this - * class to authenticate a principal with Kerberos, Hive controller services no longer - * attempt relogins explicitly. For more information, please read the documentation for - * {@link SecurityUtil#loginKerberos(Configuration, String, String)}. - *

- * In previous versions of NiFi, a {@link org.apache.nifi.hadoop.KerberosTicketRenewer} was started by - * {@link HiveConfigurator#authenticate(Configuration, String, String, long)} when the Hive - * controller service was enabled. The use of a separate thread to explicitly relogin could cause race conditions - * with the implicit relogin attempts made by hadoop/Hive code on a thread that references the same - * {@link UserGroupInformation} instance. One of these threads could leave the - * {@link javax.security.auth.Subject} in {@link UserGroupInformation} to be cleared or in an unexpected state - * while the other thread is attempting to use the {@link javax.security.auth.Subject}, resulting in failed - * authentication attempts that would leave the Hive controller service in an unrecoverable state. - * - * @see SecurityUtil#loginKerberos(Configuration, String, String) - * @deprecated Use {@link SecurityUtil#getUgiForKerberosUser(Configuration, KerberosUser)} - */ - @Deprecated - public UserGroupInformation authenticate(final Configuration hiveConfig, String principal, String keyTab) throws AuthenticationFailedException { - UserGroupInformation ugi; - try { - ugi = SecurityUtil.loginKerberos(hiveConfig, principal, keyTab); - } catch (IOException ioe) { - throw new AuthenticationFailedException("Kerberos Authentication for Hive failed", ioe); - } - return ugi; - } - - /** - * As of Apache NiFi 1.5.0, this method has been deprecated and is now a wrapper - * method which invokes {@link HiveConfigurator#authenticate(Configuration, String, String)}. It will no longer start a - * {@link org.apache.nifi.hadoop.KerberosTicketRenewer} to perform explicit relogins. - * - * @see HiveConfigurator#authenticate(Configuration, String, String) - */ - @Deprecated - public UserGroupInformation authenticate(final Configuration hiveConfig, String principal, String keyTab, long ticketRenewalPeriod) throws AuthenticationFailedException { - return authenticate(hiveConfig, principal, keyTab); - } -} diff --git a/nifi-nar-bundles/nifi-hive-bundle/nifi-hive_1_1-processors/src/main/java/org/apache/nifi/util/hive/HiveJdbcCommon.java b/nifi-nar-bundles/nifi-hive-bundle/nifi-hive_1_1-processors/src/main/java/org/apache/nifi/util/hive/HiveJdbcCommon.java deleted file mode 100644 index fd1aaf4fad..0000000000 --- a/nifi-nar-bundles/nifi-hive-bundle/nifi-hive_1_1-processors/src/main/java/org/apache/nifi/util/hive/HiveJdbcCommon.java +++ /dev/null @@ -1,462 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.nifi.util.hive; - -import org.apache.avro.Schema; -import org.apache.avro.SchemaBuilder; -import org.apache.avro.SchemaBuilder.FieldAssembler; -import org.apache.avro.file.DataFileWriter; -import org.apache.avro.generic.GenericData; -import org.apache.avro.generic.GenericDatumWriter; -import org.apache.avro.generic.GenericRecord; -import org.apache.avro.io.DatumWriter; -import org.apache.commons.lang3.StringUtils; -import org.apache.commons.text.StringEscapeUtils; -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.Path; -import org.apache.hadoop.hive.conf.HiveConf; -import org.apache.nifi.components.PropertyDescriptor; - -import java.io.IOException; -import java.io.OutputStream; -import java.math.BigDecimal; -import java.math.BigInteger; -import java.nio.ByteBuffer; -import java.nio.charset.StandardCharsets; -import java.sql.ResultSet; -import java.sql.ResultSetMetaData; -import java.sql.SQLException; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.List; - -import static java.sql.Types.ARRAY; -import static java.sql.Types.BIGINT; -import static java.sql.Types.BINARY; -import static java.sql.Types.BIT; -import static java.sql.Types.BLOB; -import static java.sql.Types.BOOLEAN; -import static java.sql.Types.CHAR; -import static java.sql.Types.CLOB; -import static java.sql.Types.DATE; -import static java.sql.Types.DECIMAL; -import static java.sql.Types.DOUBLE; -import static java.sql.Types.FLOAT; -import static java.sql.Types.INTEGER; -import static java.sql.Types.JAVA_OBJECT; -import static java.sql.Types.LONGNVARCHAR; -import static java.sql.Types.LONGVARBINARY; -import static java.sql.Types.LONGVARCHAR; -import static java.sql.Types.NCHAR; -import static java.sql.Types.NUMERIC; -import static java.sql.Types.NVARCHAR; -import static java.sql.Types.OTHER; -import static java.sql.Types.REAL; -import static java.sql.Types.ROWID; -import static java.sql.Types.SMALLINT; -import static java.sql.Types.SQLXML; -import static java.sql.Types.STRUCT; -import static java.sql.Types.TIME; -import static java.sql.Types.TIMESTAMP; -import static java.sql.Types.TINYINT; -import static java.sql.Types.VARBINARY; -import static java.sql.Types.VARCHAR; - -/** - * JDBC / HiveQL common functions. - */ -public class HiveJdbcCommon { - - public static final String AVRO = "Avro"; - public static final String CSV = "CSV"; - - public static final String MIME_TYPE_AVRO_BINARY = "application/avro-binary"; - public static final String CSV_MIME_TYPE = "text/csv"; - - - public static final PropertyDescriptor NORMALIZE_NAMES_FOR_AVRO = new PropertyDescriptor.Builder() - .name("hive-normalize-avro") - .displayName("Normalize Table/Column Names") - .description("Whether to change non-Avro-compatible characters in column names to Avro-compatible characters. For example, colons and periods " - + "will be changed to underscores in order to build a valid Avro record.") - .allowableValues("true", "false") - .defaultValue("false") - .required(true) - .build(); - - public static long convertToAvroStream(final ResultSet rs, final OutputStream outStream, final int maxRows, boolean convertNames) throws SQLException, IOException { - return convertToAvroStream(rs, outStream, null, maxRows, convertNames, null); - } - - - public static long convertToAvroStream(final ResultSet rs, final OutputStream outStream, String recordName, final int maxRows, boolean convertNames, ResultSetRowCallback callback) - throws SQLException, IOException { - final Schema schema = createSchema(rs, recordName, convertNames); - final GenericRecord rec = new GenericData.Record(schema); - - final DatumWriter datumWriter = new GenericDatumWriter<>(schema); - try (final DataFileWriter dataFileWriter = new DataFileWriter<>(datumWriter)) { - dataFileWriter.create(schema, outStream); - - final ResultSetMetaData meta = rs.getMetaData(); - final int nrOfColumns = meta.getColumnCount(); - long nrOfRows = 0; - while (rs.next()) { - if (callback != null) { - callback.processRow(rs); - } - for (int i = 1; i <= nrOfColumns; i++) { - final int javaSqlType = meta.getColumnType(i); - Object value = rs.getObject(i); - - if (value == null) { - rec.put(i - 1, null); - - } else if (javaSqlType == BINARY || javaSqlType == VARBINARY || javaSqlType == LONGVARBINARY || javaSqlType == BLOB || javaSqlType == CLOB) { - // bytes requires little bit different handling - ByteBuffer bb = null; - if (value instanceof byte[]) { - bb = ByteBuffer.wrap((byte[]) value); - } else if (value instanceof ByteBuffer) { - bb = (ByteBuffer) value; - } - if (bb != null) { - rec.put(i - 1, bb); - } else { - throw new IOException("Could not process binary object of type " + value.getClass().getName()); - } - - } else if (value instanceof Byte) { - // tinyint(1) type is returned by JDBC driver as java.sql.Types.TINYINT - // But value is returned by JDBC as java.lang.Byte - // (at least H2 JDBC works this way) - // direct put to avro record results: - // org.apache.avro.AvroRuntimeException: Unknown datum type java.lang.Byte - rec.put(i - 1, ((Byte) value).intValue()); - - } else if (value instanceof BigDecimal || value instanceof BigInteger) { - // Avro can't handle BigDecimal and BigInteger as numbers - it will throw an AvroRuntimeException such as: "Unknown datum type: java.math.BigDecimal: 38" - rec.put(i - 1, value.toString()); - - } else if (value instanceof Number) { - // Need to call the right getXYZ() method (instead of the getObject() method above), since Doubles are sometimes returned - // when the JDBC type is 6 (Float) for example. - if (javaSqlType == FLOAT) { - value = rs.getFloat(i); - } else if (javaSqlType == DOUBLE) { - value = rs.getDouble(i); - } else if (javaSqlType == INTEGER || javaSqlType == TINYINT || javaSqlType == SMALLINT) { - value = rs.getInt(i); - } - - rec.put(i - 1, value); - - } else if (value instanceof Boolean) { - rec.put(i - 1, value); - } else if (value instanceof java.sql.SQLXML) { - rec.put(i - 1, ((java.sql.SQLXML) value).getString()); - } else { - // The different types that we support are numbers (int, long, double, float), - // as well as boolean values and Strings. Since Avro doesn't provide - // timestamp types, we want to convert those to Strings. So we will cast anything other - // than numbers or booleans to strings by using the toString() method. - rec.put(i - 1, value.toString()); - } - } - dataFileWriter.append(rec); - nrOfRows += 1; - - if (maxRows > 0 && nrOfRows == maxRows) - break; - } - - return nrOfRows; - } - } - - public static Schema createSchema(final ResultSet rs, boolean convertNames) throws SQLException { - return createSchema(rs, null, false); - } - - /** - * Creates an Avro schema from a result set. If the table/record name is known a priori and provided, use that as a - * fallback for the record name if it cannot be retrieved from the result set, and finally fall back to a default value. - * - * @param rs The result set to convert to Avro - * @param recordName The a priori record name to use if it cannot be determined from the result set. - * @param convertNames Whether to convert column/table names to be legal Avro names - * @return A Schema object representing the result set converted to an Avro record - * @throws SQLException if any error occurs during conversion - */ - public static Schema createSchema(final ResultSet rs, String recordName, boolean convertNames) throws SQLException { - final ResultSetMetaData meta = rs.getMetaData(); - final int nrOfColumns = meta.getColumnCount(); - String tableName = StringUtils.isEmpty(recordName) ? "NiFi_SelectHiveQL_Record" : recordName; - try { - if (nrOfColumns > 0) { - // Hive JDBC doesn't support getTableName, instead it returns table.column for column name. Grab the table name from the first column - String firstColumnNameFromMeta = meta.getColumnName(1); - int tableNameDelimiter = firstColumnNameFromMeta.lastIndexOf("."); - if (tableNameDelimiter > -1) { - String tableNameFromMeta = firstColumnNameFromMeta.substring(0, tableNameDelimiter); - if (!StringUtils.isBlank(tableNameFromMeta)) { - tableName = tableNameFromMeta; - } - } - } - } catch (SQLException se) { - // Not all drivers support getTableName, so just use the previously-set default - } - - if (convertNames) { - tableName = normalizeNameForAvro(tableName); - } - final FieldAssembler builder = SchemaBuilder.record(tableName).namespace("any.data").fields(); - - /** - * Some missing Avro types - Decimal, Date types. May need some additional work. - */ - for (int i = 1; i <= nrOfColumns; i++) { - String columnNameFromMeta = meta.getColumnName(i); - // Hive returns table.column for column name. Grab the column name as the string after the last period - int columnNameDelimiter = columnNameFromMeta.lastIndexOf("."); - String columnName = columnNameFromMeta.substring(columnNameDelimiter + 1); - switch (meta.getColumnType(i)) { - case CHAR: - case LONGNVARCHAR: - case LONGVARCHAR: - case NCHAR: - case NVARCHAR: - case VARCHAR: - case ARRAY: - case STRUCT: - case JAVA_OBJECT: - case OTHER: - case SQLXML: - builder.name(columnName).type().unionOf().nullBuilder().endNull().and().stringType().endUnion().noDefault(); - break; - - case BIT: - case BOOLEAN: - builder.name(columnName).type().unionOf().nullBuilder().endNull().and().booleanType().endUnion().noDefault(); - break; - - case INTEGER: - // Default to signed type unless otherwise noted. Some JDBC drivers don't implement isSigned() - boolean signedType = true; - try { - signedType = meta.isSigned(i); - } catch (SQLException se) { - // Use signed types as default - } - if (signedType) { - builder.name(columnName).type().unionOf().nullBuilder().endNull().and().intType().endUnion().noDefault(); - } else { - builder.name(columnName).type().unionOf().nullBuilder().endNull().and().longType().endUnion().noDefault(); - } - break; - - case SMALLINT: - case TINYINT: - builder.name(columnName).type().unionOf().nullBuilder().endNull().and().intType().endUnion().noDefault(); - break; - - case BIGINT: - builder.name(columnName).type().unionOf().nullBuilder().endNull().and().longType().endUnion().noDefault(); - break; - - // java.sql.RowId is interface, is seems to be database - // implementation specific, let's convert to String - case ROWID: - builder.name(columnName).type().unionOf().nullBuilder().endNull().and().stringType().endUnion().noDefault(); - break; - - case FLOAT: - case REAL: - builder.name(columnName).type().unionOf().nullBuilder().endNull().and().floatType().endUnion().noDefault(); - break; - - case DOUBLE: - builder.name(columnName).type().unionOf().nullBuilder().endNull().and().doubleType().endUnion().noDefault(); - break; - - // Did not find direct suitable type, need to be clarified!!!! - case DECIMAL: - case NUMERIC: - builder.name(columnName).type().unionOf().nullBuilder().endNull().and().stringType().endUnion().noDefault(); - break; - - // Did not find direct suitable type, need to be clarified!!!! - case DATE: - case TIME: - case TIMESTAMP: - builder.name(columnName).type().unionOf().nullBuilder().endNull().and().stringType().endUnion().noDefault(); - break; - - case BINARY: - case VARBINARY: - case LONGVARBINARY: - case BLOB: - case CLOB: - builder.name(columnName).type().unionOf().nullBuilder().endNull().and().bytesType().endUnion().noDefault(); - break; - - - default: - throw new IllegalArgumentException("createSchema: Unknown SQL type " + meta.getColumnType(i) + " cannot be converted to Avro type"); - } - } - - return builder.endRecord(); - } - - public static long convertToCsvStream(final ResultSet rs, final OutputStream outStream, CsvOutputOptions outputOptions) throws SQLException, IOException { - return convertToCsvStream(rs, outStream, null, null, outputOptions); - } - - public static long convertToCsvStream(final ResultSet rs, final OutputStream outStream, String recordName, ResultSetRowCallback callback, CsvOutputOptions outputOptions) - throws SQLException, IOException { - - final ResultSetMetaData meta = rs.getMetaData(); - final int nrOfColumns = meta.getColumnCount(); - List columnNames = new ArrayList<>(nrOfColumns); - - if (outputOptions.isHeader()) { - if (outputOptions.getAltHeader() == null) { - for (int i = 1; i <= nrOfColumns; i++) { - String columnNameFromMeta = meta.getColumnName(i); - // Hive returns table.column for column name. Grab the column name as the string after the last period - int columnNameDelimiter = columnNameFromMeta.lastIndexOf("."); - columnNames.add(columnNameFromMeta.substring(columnNameDelimiter + 1)); - } - } else { - String[] altHeaderNames = outputOptions.getAltHeader().split(","); - columnNames = Arrays.asList(altHeaderNames); - } - } - - // Write column names as header row - outStream.write(StringUtils.join(columnNames, outputOptions.getDelimiter()).getBytes(StandardCharsets.UTF_8)); - if (outputOptions.isHeader()) { - outStream.write("\n".getBytes(StandardCharsets.UTF_8)); - } - - // Iterate over the rows - int maxRows = outputOptions.getMaxRowsPerFlowFile(); - long nrOfRows = 0; - while (rs.next()) { - if (callback != null) { - callback.processRow(rs); - } - List rowValues = new ArrayList<>(nrOfColumns); - for (int i = 1; i <= nrOfColumns; i++) { - final int javaSqlType = meta.getColumnType(i); - final Object value = rs.getObject(i); - - switch (javaSqlType) { - case CHAR: - case LONGNVARCHAR: - case LONGVARCHAR: - case NCHAR: - case NVARCHAR: - case VARCHAR: - String valueString = rs.getString(i); - if (valueString != null) { - // Removed extra quotes as those are a part of the escapeCsv when required. - StringBuilder sb = new StringBuilder(); - if (outputOptions.isQuote()) { - sb.append("\""); - if (outputOptions.isEscape()) { - sb.append(StringEscapeUtils.escapeCsv(valueString)); - } else { - sb.append(valueString); - } - sb.append("\""); - rowValues.add(sb.toString()); - } else { - if (outputOptions.isEscape()) { - rowValues.add(StringEscapeUtils.escapeCsv(valueString)); - } else { - rowValues.add(valueString); - } - } - } else { - rowValues.add(""); - } - break; - case ARRAY: - case STRUCT: - case JAVA_OBJECT: - String complexValueString = rs.getString(i); - if (complexValueString != null) { - rowValues.add(StringEscapeUtils.escapeCsv(complexValueString)); - } else { - rowValues.add(""); - } - break; - case SQLXML: - if (value != null) { - rowValues.add(StringEscapeUtils.escapeCsv(((java.sql.SQLXML) value).getString())); - } else { - rowValues.add(""); - } - default: - if (value != null) { - rowValues.add(value.toString()); - } else { - rowValues.add(""); - } - } - } - // Write row values - outStream.write(StringUtils.join(rowValues, outputOptions.getDelimiter()).getBytes(StandardCharsets.UTF_8)); - outStream.write("\n".getBytes(StandardCharsets.UTF_8)); - nrOfRows++; - - if (maxRows > 0 && nrOfRows == maxRows) - break; - } - return nrOfRows; - } - - public static String normalizeNameForAvro(String inputName) { - String normalizedName = inputName.replaceAll("[^A-Za-z0-9_]", "_"); - if (Character.isDigit(normalizedName.charAt(0))) { - normalizedName = "_" + normalizedName; - } - return normalizedName; - } - - /** - * An interface for callback methods which allows processing of a row during the convertToXYZStream() processing. - * IMPORTANT: This method should only work on the row pointed at by the current ResultSet reference. - * Advancing the cursor (e.g.) can cause rows to be skipped during Avro transformation. - */ - public interface ResultSetRowCallback { - void processRow(ResultSet resultSet) throws IOException; - } - - public static Configuration getConfigurationFromFiles(final String configFiles) { - final Configuration hiveConfig = new HiveConf(); - if (StringUtils.isNotBlank(configFiles)) { - for (final String configFile : configFiles.split(",")) { - hiveConfig.addResource(new Path(configFile.trim())); - } - } - return hiveConfig; - } -} diff --git a/nifi-nar-bundles/nifi-hive-bundle/nifi-hive_1_1-processors/src/main/java/org/apache/nifi/util/hive/ValidationResources.java b/nifi-nar-bundles/nifi-hive-bundle/nifi-hive_1_1-processors/src/main/java/org/apache/nifi/util/hive/ValidationResources.java deleted file mode 100644 index 1014efb3c2..0000000000 --- a/nifi-nar-bundles/nifi-hive-bundle/nifi-hive_1_1-processors/src/main/java/org/apache/nifi/util/hive/ValidationResources.java +++ /dev/null @@ -1,41 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.nifi.util.hive; - -import org.apache.hadoop.conf.Configuration; - -/** - * A helper class for maintaining loaded configurations (to avoid reloading on use unless necessary) - */ -public class ValidationResources { - - private final String configResources; - private final Configuration configuration; - - public ValidationResources(String configResources, Configuration configuration) { - this.configResources = configResources; - this.configuration = configuration; - } - - public String getConfigResources() { - return configResources; - } - - public Configuration getConfiguration() { - return configuration; - } -} diff --git a/nifi-nar-bundles/nifi-hive-bundle/nifi-hive_1_1-processors/src/main/resources/META-INF/services/org.apache.nifi.controller.ControllerService b/nifi-nar-bundles/nifi-hive-bundle/nifi-hive_1_1-processors/src/main/resources/META-INF/services/org.apache.nifi.controller.ControllerService deleted file mode 100644 index 63e4951e43..0000000000 --- a/nifi-nar-bundles/nifi-hive-bundle/nifi-hive_1_1-processors/src/main/resources/META-INF/services/org.apache.nifi.controller.ControllerService +++ /dev/null @@ -1,15 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -org.apache.nifi.dbcp.hive.Hive_1_1ConnectionPool \ No newline at end of file diff --git a/nifi-nar-bundles/nifi-hive-bundle/nifi-hive_1_1-processors/src/main/resources/META-INF/services/org.apache.nifi.processor.Processor b/nifi-nar-bundles/nifi-hive-bundle/nifi-hive_1_1-processors/src/main/resources/META-INF/services/org.apache.nifi.processor.Processor deleted file mode 100644 index 5e4e944a8c..0000000000 --- a/nifi-nar-bundles/nifi-hive-bundle/nifi-hive_1_1-processors/src/main/resources/META-INF/services/org.apache.nifi.processor.Processor +++ /dev/null @@ -1,17 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -org.apache.nifi.processors.hive.SelectHive_1_1QL -org.apache.nifi.processors.hive.PutHive_1_1QL -org.apache.nifi.processors.hive.UpdateHive_1_1Table diff --git a/nifi-nar-bundles/nifi-hive-bundle/nifi-hive_1_1-processors/src/test/java/org/apache/nifi/dbcp/hive/Hive_1_1ConnectionPoolTest.java b/nifi-nar-bundles/nifi-hive-bundle/nifi-hive_1_1-processors/src/test/java/org/apache/nifi/dbcp/hive/Hive_1_1ConnectionPoolTest.java deleted file mode 100644 index 84604ecfc8..0000000000 --- a/nifi-nar-bundles/nifi-hive-bundle/nifi-hive_1_1-processors/src/test/java/org/apache/nifi/dbcp/hive/Hive_1_1ConnectionPoolTest.java +++ /dev/null @@ -1,184 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.nifi.dbcp.hive; - -import org.apache.commons.dbcp2.BasicDataSource; -import org.apache.hadoop.security.UserGroupInformation; -import org.apache.nifi.components.PropertyDescriptor; -import org.apache.nifi.controller.AbstractControllerService; -import org.apache.nifi.kerberos.KerberosCredentialsService; -import org.apache.nifi.logging.ComponentLog; -import org.apache.nifi.processor.exception.ProcessException; -import org.apache.nifi.registry.VariableDescriptor; -import org.apache.nifi.reporting.InitializationException; -import org.apache.nifi.util.MockConfigurationContext; -import org.apache.nifi.util.MockControllerServiceLookup; -import org.apache.nifi.util.MockVariableRegistry; -import org.junit.jupiter.api.BeforeEach; -import org.junit.jupiter.api.Test; -import org.junit.jupiter.api.condition.EnabledIfSystemProperty; - -import java.io.File; -import java.io.IOException; -import java.lang.reflect.Field; -import java.lang.reflect.UndeclaredThrowableException; -import java.security.PrivilegedExceptionAction; -import java.sql.SQLException; -import java.util.HashMap; -import java.util.Map; -import java.util.UUID; - -import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.assertThrows; -import static org.mockito.ArgumentMatchers.isA; -import static org.mockito.Mockito.mock; -import static org.mockito.Mockito.when; - -public class Hive_1_1ConnectionPoolTest { - private UserGroupInformation userGroupInformation; - private Hive_1_1ConnectionPool hiveConnectionPool; - private BasicDataSource basicDataSource; - private ComponentLog componentLog; - private File krb5conf = new File("src/test/resources/krb5.conf"); - - @BeforeEach - public void setup() throws Exception { - // have to initialize this system property before anything else - System.setProperty("java.security.krb5.conf", krb5conf.getAbsolutePath()); - System.setProperty("java.security.krb5.realm", "nifi.com"); - System.setProperty("java.security.krb5.kdc", "nifi.kdc"); - - userGroupInformation = mock(UserGroupInformation.class); - basicDataSource = mock(BasicDataSource.class); - componentLog = mock(ComponentLog.class); - - when(userGroupInformation.doAs(isA(PrivilegedExceptionAction.class))).thenAnswer(invocation -> { - try { - return ((PrivilegedExceptionAction) invocation.getArguments()[0]).run(); - } catch (IOException | Error | RuntimeException | InterruptedException e) { - throw e; - } catch (Throwable e) { - throw new UndeclaredThrowableException(e); - } - }); - - initPool(); - } - - private void initPool() throws Exception { - hiveConnectionPool = new Hive_1_1ConnectionPool(); - - Field ugiField = Hive_1_1ConnectionPool.class.getDeclaredField("ugi"); - ugiField.setAccessible(true); - ugiField.set(hiveConnectionPool, userGroupInformation); - - Field dataSourceField = Hive_1_1ConnectionPool.class.getDeclaredField("dataSource"); - dataSourceField.setAccessible(true); - dataSourceField.set(hiveConnectionPool, basicDataSource); - - Field componentLogField = AbstractControllerService.class.getDeclaredField("logger"); - componentLogField.setAccessible(true); - componentLogField.set(hiveConnectionPool, componentLog); - } - - @Test - public void testGetConnectionSqlException() throws SQLException { - SQLException sqlException = new SQLException("bad sql"); - when(basicDataSource.getConnection()).thenThrow(sqlException); - ProcessException e = assertThrows(ProcessException.class, () -> hiveConnectionPool.getConnection()); - assertEquals(sqlException, e.getCause()); - } - - @Test - public void testExpressionLanguageSupport() throws Exception { - final String URL = "jdbc:hive2://localhost:10000/default"; - final String USER = "user"; - final String PASS = "pass"; - final int MAX_CONN = 7; - final String MAX_CONN_LIFETIME = "1 sec"; - final String MAX_WAIT = "10 sec"; // 10000 milliseconds - final String CONF = "/path/to/hive-site.xml"; - hiveConnectionPool = new Hive_1_1ConnectionPool(); - - Map props = new HashMap() {{ - put(Hive_1_1ConnectionPool.DATABASE_URL, "${url}"); - put(Hive_1_1ConnectionPool.DB_USER, "${username}"); - put(Hive_1_1ConnectionPool.DB_PASSWORD, "${password}"); - put(Hive_1_1ConnectionPool.MAX_TOTAL_CONNECTIONS, "${maxconn}"); - put(Hive_1_1ConnectionPool.MAX_CONN_LIFETIME, "${maxconnlifetime}"); - put(Hive_1_1ConnectionPool.MAX_WAIT_TIME, "${maxwait}"); - put(Hive_1_1ConnectionPool.HIVE_CONFIGURATION_RESOURCES, "${hiveconf}"); - }}; - - MockVariableRegistry registry = new MockVariableRegistry(); - registry.setVariable(new VariableDescriptor("url"), URL); - registry.setVariable(new VariableDescriptor("username"), USER); - registry.setVariable(new VariableDescriptor("password"), PASS); - registry.setVariable(new VariableDescriptor("maxconn"), Integer.toString(MAX_CONN)); - registry.setVariable(new VariableDescriptor("maxconnlifetime"), MAX_CONN_LIFETIME); - registry.setVariable(new VariableDescriptor("maxwait"), MAX_WAIT); - registry.setVariable(new VariableDescriptor("hiveconf"), CONF); - - - MockConfigurationContext context = new MockConfigurationContext(props, null, registry); - hiveConnectionPool.onConfigured(context); - - Field dataSourceField = Hive_1_1ConnectionPool.class.getDeclaredField("dataSource"); - dataSourceField.setAccessible(true); - basicDataSource = (BasicDataSource) dataSourceField.get(hiveConnectionPool); - assertEquals(URL, basicDataSource.getUrl()); - assertEquals(USER, basicDataSource.getUsername()); - assertEquals(PASS, basicDataSource.getPassword()); - assertEquals(MAX_CONN, basicDataSource.getMaxTotal()); - assertEquals(1000L, basicDataSource.getMaxConnLifetimeMillis()); - assertEquals(10000L, basicDataSource.getMaxWaitMillis()); - assertEquals(URL, hiveConnectionPool.getConnectionURL()); - } - - @EnabledIfSystemProperty( - named = "nifi.test.unstable", - matches = "true", - disabledReason = "Kerberos does not seem to be properly handled in Travis build, but, locally, this test should successfully run") - @Test - public void testKerberosAuthException() { - final String URL = "jdbc:hive2://localhost:10000/default"; - final String conf = "src/test/resources/hive-site-security.xml"; - final String ktab = "src/test/resources/fake.keytab"; - final String kprinc = "bad@PRINCIPAL.COM"; - final String kerberosCredentialsServiceId = UUID.randomUUID().toString(); - - Map props = new HashMap() {{ - put(Hive_1_1ConnectionPool.DATABASE_URL, "${url}"); - put(Hive_1_1ConnectionPool.HIVE_CONFIGURATION_RESOURCES, "${conf}"); - put(Hive_1_1ConnectionPool.KERBEROS_CREDENTIALS_SERVICE, kerberosCredentialsServiceId); - }}; - - MockVariableRegistry registry = new MockVariableRegistry(); - registry.setVariable(new VariableDescriptor("url"), URL); - registry.setVariable(new VariableDescriptor("conf"), conf); - - MockControllerServiceLookup mockControllerServiceLookup = new MockControllerServiceLookup() {}; - KerberosCredentialsService kerberosCredentialsService = mock(KerberosCredentialsService.class); - when(kerberosCredentialsService.getKeytab()).thenReturn(ktab); - when(kerberosCredentialsService.getPrincipal()).thenReturn(kprinc); - mockControllerServiceLookup.addControllerService(kerberosCredentialsService, kerberosCredentialsServiceId); - - MockConfigurationContext context = new MockConfigurationContext(props, mockControllerServiceLookup, registry); - assertThrows(InitializationException.class, () -> hiveConnectionPool.onConfigured(context)); - } -} diff --git a/nifi-nar-bundles/nifi-hive-bundle/nifi-hive_1_1-processors/src/test/java/org/apache/nifi/processors/hive/TestHiveParser.java b/nifi-nar-bundles/nifi-hive-bundle/nifi-hive_1_1-processors/src/test/java/org/apache/nifi/processors/hive/TestHiveParser.java deleted file mode 100644 index c191975c2e..0000000000 --- a/nifi-nar-bundles/nifi-hive-bundle/nifi-hive_1_1-processors/src/test/java/org/apache/nifi/processors/hive/TestHiveParser.java +++ /dev/null @@ -1,292 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.nifi.processors.hive; - -import org.apache.nifi.processor.ProcessContext; -import org.apache.nifi.processor.ProcessSessionFactory; -import org.apache.nifi.processor.ProcessorInitializationContext; -import org.apache.nifi.processor.exception.ProcessException; -import org.apache.nifi.util.MockProcessContext; -import org.apache.nifi.util.MockProcessorInitializationContext; -import org.junit.jupiter.api.BeforeEach; -import org.junit.jupiter.api.Test; - -import java.util.Set; -import java.util.concurrent.atomic.AtomicInteger; - -import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.assertTrue; - -public class TestHiveParser extends AbstractHive_1_1QLProcessor { - - @BeforeEach - public void initialize() { - final MockProcessContext processContext = new MockProcessContext(this); - final ProcessorInitializationContext initializationContext = new MockProcessorInitializationContext(this, processContext); - initialize(initializationContext); - } - - @Override - public void onTrigger(ProcessContext context, ProcessSessionFactory sessionFactory) throws ProcessException { - - } - - @Test - public void parseSelect() { - String query = "select a.empid, to_something(b.saraly) from " + - "company.emp a inner join default.salary b where a.empid = b.empid"; - final Set tableNames = findTableNames(query); - System.out.printf("tableNames=%s\n", tableNames); - assertEquals(2, tableNames.size()); - assertTrue(tableNames.contains(new TableName("company", "emp", true))); - assertTrue(tableNames.contains(new TableName("default", "salary", true))); - } - - @Test - public void parseSelectPrepared() { - String query = "select empid from company.emp a where a.firstName = ?"; - final Set tableNames = findTableNames(query); - System.out.printf("tableNames=%s\n", tableNames); - assertEquals(1, tableNames.size()); - assertTrue(tableNames.contains(new TableName("company", "emp", true))); - } - - - @Test - public void parseLongSelect() { - String query = "select\n" + - "\n" + - " i_item_id,\n" + - "\n" + - " i_item_desc,\n" + - "\n" + - " s_state,\n" + - "\n" + - " count(ss_quantity) as store_sales_quantitycount,\n" + - "\n" + - " avg(ss_quantity) as store_sales_quantityave,\n" + - "\n" + - " stddev_samp(ss_quantity) as store_sales_quantitystdev,\n" + - "\n" + - " stddev_samp(ss_quantity) / avg(ss_quantity) as store_sales_quantitycov,\n" + - "\n" + - " count(sr_return_quantity) as store_returns_quantitycount,\n" + - "\n" + - " avg(sr_return_quantity) as store_returns_quantityave,\n" + - "\n" + - " stddev_samp(sr_return_quantity) as store_returns_quantitystdev,\n" + - "\n" + - " stddev_samp(sr_return_quantity) / avg(sr_return_quantity) as store_returns_quantitycov,\n" + - "\n" + - " count(cs_quantity) as catalog_sales_quantitycount,\n" + - "\n" + - " avg(cs_quantity) as catalog_sales_quantityave,\n" + - "\n" + - " stddev_samp(cs_quantity) / avg(cs_quantity) as catalog_sales_quantitystdev,\n" + - "\n" + - " stddev_samp(cs_quantity) / avg(cs_quantity) as catalog_sales_quantitycov\n" + - "\n" + - "from\n" + - "\n" + - " store_sales,\n" + - "\n" + - " store_returns,\n" + - "\n" + - " catalog_sales,\n" + - "\n" + - " date_dim d1,\n" + - "\n" + - " date_dim d2,\n" + - "\n" + - " date_dim d3,\n" + - "\n" + - " store,\n" + - "\n" + - " item\n" + - "\n" + - "where\n" + - "\n" + - " d1.d_quarter_name = '2000Q1'\n" + - "\n" + - " and d1.d_date_sk = ss_sold_date_sk\n" + - "\n" + - " and i_item_sk = ss_item_sk\n" + - "\n" + - " and s_store_sk = ss_store_sk\n" + - "\n" + - " and ss_customer_sk = sr_customer_sk\n" + - "\n" + - " and ss_item_sk = sr_item_sk\n" + - "\n" + - " and ss_ticket_number = sr_ticket_number\n" + - "\n" + - " and sr_returned_date_sk = d2.d_date_sk\n" + - "\n" + - " and d2.d_quarter_name in ('2000Q1' , '2000Q2', '2000Q3')\n" + - "\n" + - " and sr_customer_sk = cs_bill_customer_sk\n" + - "\n" + - " and sr_item_sk = cs_item_sk\n" + - "\n" + - " and cs_sold_date_sk = d3.d_date_sk\n" + - "\n" + - " and d3.d_quarter_name in ('2000Q1' , '2000Q2', '2000Q3')\n" + - "\n" + - "group by i_item_id , i_item_desc , s_state\n" + - "\n" + - "order by i_item_id , i_item_desc , s_state\n" + - "\n" + - "limit 100"; - - final Set tableNames = findTableNames(query); - System.out.printf("tableNames=%s\n", tableNames); - assertEquals(6, tableNames.size()); - AtomicInteger cnt = new AtomicInteger(0); - for (TableName tableName : tableNames) { - if (tableName.equals(new TableName(null, "store_sales", true))) { - cnt.incrementAndGet(); - } else if (tableName.equals(new TableName(null, "store_returns", true))) { - cnt.incrementAndGet(); - } else if (tableName.equals(new TableName(null, "catalog_sales", true))) { - cnt.incrementAndGet(); - } else if (tableName.equals(new TableName(null, "date_dim", true))) { - cnt.incrementAndGet(); - } else if (tableName.equals(new TableName(null, "store", true))) { - cnt.incrementAndGet(); - } else if (tableName.equals(new TableName(null, "item", true))) { - cnt.incrementAndGet(); - } - } - assertEquals(6, cnt.get()); - } - - @Test - public void parseSelectInsert() { - String query = "insert into databaseA.tableA select key, max(value) from databaseA.tableA where category = 'x'"; - - // The same database.tableName can appear two times for input and output. - final Set tableNames = findTableNames(query); - System.out.printf("tableNames=%s\n", tableNames); - assertEquals(2, tableNames.size()); - AtomicInteger cnt = new AtomicInteger(0); - tableNames.forEach(tableName -> { - if (tableName.equals(new TableName("databaseA", "tableA", false))) { - cnt.incrementAndGet(); - } else if (tableName.equals(new TableName("databaseA", "tableA", true))) { - cnt.incrementAndGet(); - } - }); - assertEquals(2, cnt.get()); - } - - @Test - public void parseInsert() { - String query = "insert into databaseB.tableB1 select something from tableA1 a1 inner join tableA2 a2 where a1.id = a2.id"; - - final Set tableNames = findTableNames(query); - System.out.printf("tableNames=%s\n", tableNames); - assertEquals(3, tableNames.size()); - AtomicInteger cnt = new AtomicInteger(0); - tableNames.forEach(tableName -> { - if (tableName.equals(new TableName("databaseB", "tableB1", false))) { - cnt.incrementAndGet(); - } else if (tableName.equals(new TableName(null, "tableA1", true))) { - cnt.incrementAndGet(); - } else if (tableName.equals(new TableName(null, "tableA2", true))) { - cnt.incrementAndGet(); - } - }); - assertEquals(3, cnt.get()); - } - - @Test - public void parseUpdate() { - String query = "update table_a set y = 'updated' where x > 100"; - - final Set tableNames = findTableNames(query); - System.out.printf("tableNames=%s\n", tableNames); - assertEquals(1, tableNames.size()); - assertTrue(tableNames.contains(new TableName(null, "table_a", false))); - } - - @Test - public void parseDelete() { - String query = "delete from table_a where x > 100"; - - final Set tableNames = findTableNames(query); - System.out.printf("tableNames=%s\n", tableNames); - assertEquals(1, tableNames.size()); - assertTrue(tableNames.contains(new TableName(null, "table_a", false))); - } - - @Test - public void parseDDL() { - String query = "CREATE TABLE IF NOT EXISTS EMPLOYEES(\n" + - "EmployeeID INT,FirstName STRING, Title STRING,\n" + - "State STRING, Laptop STRING)\n" + - "COMMENT 'Employee Names'\n" + - "STORED AS ORC"; - - - final Set tableNames = findTableNames(query); - System.out.printf("tableNames=%s\n", tableNames); - assertEquals(1, tableNames.size()); - assertTrue(tableNames.contains(new TableName(null, "EMPLOYEES", false))); - } - - @Test - public void parseSetProperty() { - String query = " set 'hive.exec.dynamic.partition.mode'=nonstrict"; - final Set tableNames = findTableNames(query); - System.out.printf("tableNames=%s\n", tableNames); - assertEquals(0, tableNames.size()); - } - - @Test - public void parseSetRole() { - String query = "set role all"; - final Set tableNames = findTableNames(query); - System.out.printf("tableNames=%s\n", tableNames); - assertEquals(0, tableNames.size()); - } - - @Test - public void parseShowRoles() { - String query = "show roles"; - final Set tableNames = findTableNames(query); - System.out.printf("tableNames=%s\n", tableNames); - assertEquals(0, tableNames.size()); - } - - @Test - public void parseMsck() { - String query = "msck repair table table_a"; - final Set tableNames = findTableNames(query); - System.out.printf("tableNames=%s\n", tableNames); - assertEquals(1, tableNames.size()); - assertTrue(tableNames.contains(new TableName(null, "table_a", false))); - } - - @Test - public void parseAddJar() { - String query = "ADD JAR hdfs:///tmp/my_jar.jar"; - final Set tableNames = findTableNames(query); - System.out.printf("tableNames=%s\n", tableNames); - assertEquals(0, tableNames.size()); - } - -} diff --git a/nifi-nar-bundles/nifi-hive-bundle/nifi-hive_1_1-processors/src/test/java/org/apache/nifi/processors/hive/TestPutHive_1_1QL.java b/nifi-nar-bundles/nifi-hive-bundle/nifi-hive_1_1-processors/src/test/java/org/apache/nifi/processors/hive/TestPutHive_1_1QL.java deleted file mode 100644 index 4a6c944b90..0000000000 --- a/nifi-nar-bundles/nifi-hive-bundle/nifi-hive_1_1-processors/src/test/java/org/apache/nifi/processors/hive/TestPutHive_1_1QL.java +++ /dev/null @@ -1,820 +0,0 @@ -package org.apache.nifi.processors.hive;/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import org.apache.nifi.controller.AbstractControllerService; -import org.apache.nifi.dbcp.DBCPService; -import org.apache.nifi.dbcp.hive.Hive_1_1DBCPService; -import org.apache.nifi.processor.exception.ProcessException; -import org.apache.nifi.processor.util.pattern.RollbackOnFailure; -import org.apache.nifi.reporting.InitializationException; -import org.apache.nifi.util.TestRunner; -import org.apache.nifi.util.TestRunners; -import org.junit.jupiter.api.BeforeAll; -import org.junit.jupiter.api.Test; -import org.junit.jupiter.api.condition.DisabledOnOs; -import org.junit.jupiter.api.condition.OS; -import org.junit.jupiter.api.io.TempDir; -import org.mockito.Mockito; - -import java.io.File; -import java.io.IOException; -import java.nio.file.Path; -import java.sql.Connection; -import java.sql.DriverManager; -import java.sql.ResultSet; -import java.sql.SQLException; -import java.sql.Statement; -import java.sql.Types; -import java.util.HashMap; -import java.util.Map; - -import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.assertFalse; -import static org.junit.jupiter.api.Assertions.assertThrows; -import static org.junit.jupiter.api.Assertions.assertTrue; - -@DisabledOnOs(OS.WINDOWS) -public class TestPutHive_1_1QL { - private static final String createPersons = "CREATE TABLE PERSONS (id integer primary key, name varchar(100), code integer)"; - private static final String createPersonsAutoId = "CREATE TABLE PERSONS (id INTEGER NOT NULL GENERATED ALWAYS AS IDENTITY (START WITH 1), name VARCHAR(100), code INTEGER check(code <= 100))"; - - @BeforeAll - public static void setup() { - System.setProperty("derby.stream.error.file", "target/derby.log"); - } - - @Test - public void testDirectStatements(@TempDir Path tempDir) throws InitializationException, ProcessException, SQLException, IOException { - final TestRunner runner = TestRunners.newTestRunner(PutHive_1_1QL.class); - - final File dbDir = tempDir.resolve("db").toFile(); - final DBCPService service = new MockDBCPService(dbDir.getAbsolutePath()); - runner.addControllerService("dbcp", service); - runner.enableControllerService(service); - - try (final Connection conn = service.getConnection()) { - try (final Statement stmt = conn.createStatement()) { - stmt.executeUpdate(createPersons); - } - } - - runner.setProperty(PutHive_1_1QL.HIVE_DBCP_SERVICE, "dbcp"); - runner.enqueue("INSERT INTO PERSONS (ID, NAME, CODE) VALUES (1, 'Mark', 84)".getBytes()); - runner.run(); - - runner.assertAllFlowFilesTransferred(PutHive_1_1QL.REL_SUCCESS, 1); - - try (final Connection conn = service.getConnection()) { - try (final Statement stmt = conn.createStatement()) { - final ResultSet rs = stmt.executeQuery("SELECT * FROM PERSONS"); - assertTrue(rs.next()); - assertEquals(1, rs.getInt(1)); - assertEquals("Mark", rs.getString(2)); - assertEquals(84, rs.getInt(3)); - assertFalse(rs.next()); - } - } - - runner.enqueue("UPDATE PERSONS SET NAME='George' WHERE ID=1".getBytes()); - runner.run(); - - try (final Connection conn = service.getConnection()) { - try (final Statement stmt = conn.createStatement()) { - final ResultSet rs = stmt.executeQuery("SELECT * FROM PERSONS"); - assertTrue(rs.next()); - assertEquals(1, rs.getInt(1)); - assertEquals("George", rs.getString(2)); - assertEquals(84, rs.getInt(3)); - assertFalse(rs.next()); - } - } - } - - @Test - public void testFailInMiddleWithBadStatementRollbackOnFailure(@TempDir Path tempDir) throws InitializationException, ProcessException, SQLException, IOException { - final TestRunner runner = TestRunners.newTestRunner(PutHive_1_1QL.class); - final File dbDir = tempDir.resolve("db").toFile(); - final DBCPService service = new MockDBCPService(dbDir.getAbsolutePath()); - runner.addControllerService("dbcp", service); - runner.enableControllerService(service); - runner.setProperty(RollbackOnFailure.ROLLBACK_ON_FAILURE, "true"); - - try (final Connection conn = service.getConnection()) { - try (final Statement stmt = conn.createStatement()) { - stmt.executeUpdate(createPersonsAutoId); - } - } - - runner.setProperty(PutHive_1_1QL.HIVE_DBCP_SERVICE, "dbcp"); - runner.enqueue("INSERT INTO PERSONS (NAME, CODE) VALUES ('Mark', 84)".getBytes()); - runner.enqueue("INSERT INTO PERSONS".getBytes()); // intentionally wrong syntax - runner.enqueue("INSERT INTO PERSONS (NAME, CODE) VALUES ('Tom', 3)".getBytes()); - runner.enqueue("INSERT INTO PERSONS (NAME, CODE) VALUES ('Harry', 44)".getBytes()); - runner.run(); - - // The 1st one should be routed to success, others should stay in queue. - assertEquals(3, runner.getQueueSize().getObjectCount()); - runner.assertTransferCount(PutHive_1_1QL.REL_FAILURE, 0); - runner.assertTransferCount(PutHive_1_1QL.REL_SUCCESS, 1); - } - - @Test - public void testFailAtBeginning(@TempDir Path tempDir) throws InitializationException, ProcessException, SQLException, IOException { - final TestRunner runner = TestRunners.newTestRunner(PutHive_1_1QL.class); - final File dbDir = tempDir.resolve("db").toFile(); - final DBCPService service = new MockDBCPService(dbDir.getAbsolutePath()); - runner.addControllerService("dbcp", service); - runner.enableControllerService(service); - - try (final Connection conn = service.getConnection()) { - try (final Statement stmt = conn.createStatement()) { - stmt.executeUpdate(createPersonsAutoId); - } - } - - runner.setProperty(PutHive_1_1QL.HIVE_DBCP_SERVICE, "dbcp"); - runner.enqueue("INSERT INTO PERSONS".getBytes()); // intentionally wrong syntax - runner.enqueue("INSERT INTO PERSONS (NAME, CODE) VALUES ('Tom', 3)".getBytes()); - runner.enqueue("INSERT INTO PERSONS (NAME, CODE) VALUES ('Harry', 44)".getBytes()); - runner.run(); - - runner.assertTransferCount(PutHive_1_1QL.REL_FAILURE, 1); - runner.assertTransferCount(PutHive_1_1QL.REL_SUCCESS, 2); - } - - @Test - public void testFailAtBeginningRollbackOnFailure(@TempDir Path tempDir) throws InitializationException, ProcessException, SQLException, IOException { - final TestRunner runner = TestRunners.newTestRunner(PutHive_1_1QL.class); - final File dbDir = tempDir.resolve("db").toFile(); - final DBCPService service = new MockDBCPService(dbDir.getAbsolutePath()); - runner.addControllerService("dbcp", service); - runner.enableControllerService(service); - runner.setProperty(RollbackOnFailure.ROLLBACK_ON_FAILURE, "true"); - - try (final Connection conn = service.getConnection()) { - try (final Statement stmt = conn.createStatement()) { - stmt.executeUpdate(createPersonsAutoId); - } - } - - runner.setProperty(PutHive_1_1QL.HIVE_DBCP_SERVICE, "dbcp"); - runner.enqueue("INSERT INTO PERSONS".getBytes()); // intentionally wrong syntax - runner.enqueue("INSERT INTO PERSONS (NAME, CODE) VALUES ('Tom', 3)".getBytes()); - runner.enqueue("INSERT INTO PERSONS (NAME, CODE) VALUES ('Harry', 44)".getBytes()); - - AssertionError e = assertThrows(AssertionError.class, () -> runner.run()); - assertTrue(e.getCause() instanceof ProcessException); - - assertEquals(3, runner.getQueueSize().getObjectCount()); - runner.assertTransferCount(PutHive_1_1QL.REL_FAILURE, 0); - runner.assertTransferCount(PutHive_1_1QL.REL_SUCCESS, 0); - } - - @Test - public void testFailInMiddleWithBadParameterType(@TempDir Path tempDir) throws InitializationException, ProcessException, SQLException, IOException { - final TestRunner runner = TestRunners.newTestRunner(PutHive_1_1QL.class); - final File dbDir = tempDir.resolve("db").toFile(); - final DBCPService service = new MockDBCPService(dbDir.getAbsolutePath()); - runner.addControllerService("dbcp", service); - runner.enableControllerService(service); - - try (final Connection conn = service.getConnection()) { - try (final Statement stmt = conn.createStatement()) { - stmt.executeUpdate(createPersonsAutoId); - } - } - - runner.setProperty(PutHive_1_1QL.HIVE_DBCP_SERVICE, "dbcp"); - - final Map goodAttributes = new HashMap<>(); - goodAttributes.put("hiveql.args.1.type", String.valueOf(Types.INTEGER)); - goodAttributes.put("hiveql.args.1.value", "84"); - - final Map badAttributes = new HashMap<>(); - badAttributes.put("hiveql.args.1.type", String.valueOf(Types.VARCHAR)); - badAttributes.put("hiveql.args.1.value", "hello"); - - final byte[] data = "INSERT INTO PERSONS (NAME, CODE) VALUES ('Mark', ?)".getBytes(); - runner.enqueue(data, goodAttributes); - runner.enqueue(data, badAttributes); - runner.enqueue(data, goodAttributes); - runner.enqueue(data, goodAttributes); - runner.run(); - - runner.assertTransferCount(PutHive_1_1QL.REL_FAILURE, 1); - runner.assertTransferCount(PutHive_1_1QL.REL_SUCCESS, 3); - } - - - @Test - public void testFailInMiddleWithBadParameterValue(@TempDir Path tempDir) throws InitializationException, ProcessException, SQLException, IOException { - final TestRunner runner = TestRunners.newTestRunner(PutHive_1_1QL.class); - final File dbDir = tempDir.resolve("db").toFile(); - final DBCPService service = new MockDBCPService(dbDir.getAbsolutePath()); - runner.addControllerService("dbcp", service); - runner.enableControllerService(service); - - try (final Connection conn = service.getConnection()) { - try (final Statement stmt = conn.createStatement()) { - stmt.executeUpdate(createPersonsAutoId); - } - } - - runner.setProperty(PutHive_1_1QL.HIVE_DBCP_SERVICE, "dbcp"); - - final Map goodAttributes = new HashMap<>(); - goodAttributes.put("hiveql.args.1.type", String.valueOf(Types.INTEGER)); - goodAttributes.put("hiveql.args.1.value", "84"); - - final Map badAttributes = new HashMap<>(); - badAttributes.put("hiveql.args.1.type", String.valueOf(Types.INTEGER)); - badAttributes.put("hiveql.args.1.value", "101"); // Constraint violation, up to 100 - - final byte[] data = "INSERT INTO PERSONS (NAME, CODE) VALUES ('Mark', ?)".getBytes(); - runner.enqueue(data, goodAttributes); - runner.enqueue(data, badAttributes); - runner.enqueue(data, goodAttributes); - runner.enqueue(data, goodAttributes); - runner.run(); - - runner.assertTransferCount(PutHive_1_1QL.REL_SUCCESS, 3); - runner.assertTransferCount(PutHive_1_1QL.REL_FAILURE, 1); - - try (final Connection conn = service.getConnection()) { - try (final Statement stmt = conn.createStatement()) { - final ResultSet rs = stmt.executeQuery("SELECT * FROM PERSONS"); - assertTrue(rs.next()); - assertEquals(1, rs.getInt(1)); - assertEquals("Mark", rs.getString(2)); - assertEquals(84, rs.getInt(3)); - assertTrue(rs.next()); - assertTrue(rs.next()); - assertFalse(rs.next()); - } - } - } - - @Test - public void testFailInMiddleWithBadNumberFormat(@TempDir Path tempDir) throws InitializationException, ProcessException, SQLException, IOException { - final TestRunner runner = TestRunners.newTestRunner(PutHive_1_1QL.class); - final File dbDir = tempDir.resolve("db").toFile(); - final DBCPService service = new MockDBCPService(dbDir.getAbsolutePath()); - runner.addControllerService("dbcp", service); - runner.enableControllerService(service); - - try (final Connection conn = service.getConnection()) { - try (final Statement stmt = conn.createStatement()) { - stmt.executeUpdate(createPersonsAutoId); - } - } - - runner.setProperty(PutHive_1_1QL.HIVE_DBCP_SERVICE, "dbcp"); - - final Map goodAttributes = new HashMap<>(); - goodAttributes.put("hiveql.args.1.type", String.valueOf(Types.INTEGER)); - goodAttributes.put("hiveql.args.1.value", "84"); - - final Map badAttributes = new HashMap<>(); - badAttributes.put("hiveql.args.1.type", String.valueOf(Types.INTEGER)); - badAttributes.put("hiveql.args.1.value", "NOT_NUMBER"); - - final byte[] data = "INSERT INTO PERSONS (NAME, CODE) VALUES ('Mark', ?)".getBytes(); - runner.enqueue(data, goodAttributes); - runner.enqueue(data, badAttributes); - runner.enqueue(data, goodAttributes); - runner.enqueue(data, goodAttributes); - runner.run(); - - runner.assertTransferCount(PutHive_1_1QL.REL_SUCCESS, 3); - runner.assertTransferCount(PutHive_1_1QL.REL_FAILURE, 1); - - try (final Connection conn = service.getConnection()) { - try (final Statement stmt = conn.createStatement()) { - final ResultSet rs = stmt.executeQuery("SELECT * FROM PERSONS"); - assertTrue(rs.next()); - assertEquals(1, rs.getInt(1)); - assertEquals("Mark", rs.getString(2)); - assertEquals(84, rs.getInt(3)); - assertTrue(rs.next()); - assertTrue(rs.next()); - assertFalse(rs.next()); - } - } - } - - - @Test - public void testUsingSqlDataTypesWithNegativeValues(@TempDir Path tempDir) throws InitializationException, ProcessException, SQLException, IOException { - final TestRunner runner = TestRunners.newTestRunner(PutHive_1_1QL.class); - final File dbDir = tempDir.resolve("db").toFile(); - final DBCPService service = new MockDBCPService(dbDir.getAbsolutePath()); - runner.addControllerService("dbcp", service); - runner.enableControllerService(service); - - try (final Connection conn = service.getConnection()) { - try (final Statement stmt = conn.createStatement()) { - stmt.executeUpdate("CREATE TABLE PERSONS (id integer primary key, name varchar(100), code bigint)"); - } - } - - runner.setProperty(PutHive_1_1QL.HIVE_DBCP_SERVICE, "dbcp"); - final Map attributes = new HashMap<>(); - attributes.put("hiveql.args.1.type", "-5"); - attributes.put("hiveql.args.1.value", "84"); - runner.enqueue("INSERT INTO PERSONS VALUES (1, 'Mark', ?)".getBytes(), attributes); - runner.run(); - - runner.assertAllFlowFilesTransferred(PutHive_1_1QL.REL_SUCCESS, 1); - runner.getFlowFilesForRelationship(PutHive_1_1QL.REL_SUCCESS).get(0).assertAttributeEquals(PutHive_1_1QL.ATTR_OUTPUT_TABLES, "PERSONS"); - - try (final Connection conn = service.getConnection()) { - try (final Statement stmt = conn.createStatement()) { - final ResultSet rs = stmt.executeQuery("SELECT * FROM PERSONS"); - assertTrue(rs.next()); - assertEquals(1, rs.getInt(1)); - assertEquals("Mark", rs.getString(2)); - assertEquals(84, rs.getInt(3)); - assertFalse(rs.next()); - } - } - } - - @Test - public void testStatementsWithPreparedParameters(@TempDir Path tempDir) throws InitializationException, ProcessException, SQLException, IOException { - final TestRunner runner = TestRunners.newTestRunner(PutHive_1_1QL.class); - final File dbDir = tempDir.resolve("db").toFile(); - final DBCPService service = new MockDBCPService(dbDir.getAbsolutePath()); - runner.addControllerService("dbcp", service); - runner.enableControllerService(service); - - try (final Connection conn = service.getConnection()) { - try (final Statement stmt = conn.createStatement()) { - stmt.executeUpdate(createPersons); - } - } - - runner.setProperty(PutHive_1_1QL.HIVE_DBCP_SERVICE, "dbcp"); - final Map attributes = new HashMap<>(); - attributes.put("hiveql.args.1.type", String.valueOf(Types.INTEGER)); - attributes.put("hiveql.args.1.value", "1"); - - attributes.put("hiveql.args.2.type", String.valueOf(Types.VARCHAR)); - attributes.put("hiveql.args.2.value", "Mark"); - - attributes.put("hiveql.args.3.type", String.valueOf(Types.INTEGER)); - attributes.put("hiveql.args.3.value", "84"); - - runner.enqueue("INSERT INTO PERSONS (ID, NAME, CODE) VALUES (?, ?, ?)".getBytes(), attributes); - runner.run(); - - runner.assertAllFlowFilesTransferred(PutHive_1_1QL.REL_SUCCESS, 1); - - try (final Connection conn = service.getConnection()) { - try (final Statement stmt = conn.createStatement()) { - final ResultSet rs = stmt.executeQuery("SELECT * FROM PERSONS"); - assertTrue(rs.next()); - assertEquals(1, rs.getInt(1)); - assertEquals("Mark", rs.getString(2)); - assertEquals(84, rs.getInt(3)); - assertFalse(rs.next()); - } - } - - runner.clearTransferState(); - - attributes.clear(); - attributes.put("hiveql.args.1.type", String.valueOf(Types.VARCHAR)); - attributes.put("hiveql.args.1.value", "George"); - - attributes.put("hiveql.args.2.type", String.valueOf(Types.INTEGER)); - attributes.put("hiveql.args.2.value", "1"); - - runner.enqueue("UPDATE PERSONS SET NAME=? WHERE ID=?".getBytes(), attributes); - runner.run(); - runner.assertAllFlowFilesTransferred(PutHive_1_1QL.REL_SUCCESS, 1); - - try (final Connection conn = service.getConnection()) { - try (final Statement stmt = conn.createStatement()) { - final ResultSet rs = stmt.executeQuery("SELECT * FROM PERSONS"); - assertTrue(rs.next()); - assertEquals(1, rs.getInt(1)); - assertEquals("George", rs.getString(2)); - assertEquals(84, rs.getInt(3)); - assertFalse(rs.next()); - } - } - } - - - @Test - public void testMultipleStatementsWithinFlowFile(@TempDir Path tempDir) throws InitializationException, ProcessException, SQLException, IOException { - final TestRunner runner = TestRunners.newTestRunner(PutHive_1_1QL.class); - final File dbDir = tempDir.resolve("db").toFile(); - final DBCPService service = new MockDBCPService(dbDir.getAbsolutePath()); - runner.addControllerService("dbcp", service); - runner.enableControllerService(service); - - try (final Connection conn = service.getConnection()) { - try (final Statement stmt = conn.createStatement()) { - stmt.executeUpdate(createPersons); - } - } - - runner.setProperty(PutHive_1_1QL.HIVE_DBCP_SERVICE, "dbcp"); - - final String sql = "INSERT INTO PERSONS (ID, NAME, CODE) VALUES (?, ?, ?); " + - "UPDATE PERSONS SET NAME='George' WHERE ID=?; "; - final Map attributes = new HashMap<>(); - attributes.put("hiveql.args.1.type", String.valueOf(Types.INTEGER)); - attributes.put("hiveql.args.1.value", "1"); - - attributes.put("hiveql.args.2.type", String.valueOf(Types.VARCHAR)); - attributes.put("hiveql.args.2.value", "Mark"); - - attributes.put("hiveql.args.3.type", String.valueOf(Types.INTEGER)); - attributes.put("hiveql.args.3.value", "84"); - - attributes.put("hiveql.args.4.type", String.valueOf(Types.INTEGER)); - attributes.put("hiveql.args.4.value", "1"); - - runner.enqueue(sql.getBytes(), attributes); - runner.run(); - - // should fail because of the semicolon - runner.assertAllFlowFilesTransferred(PutHive_1_1QL.REL_SUCCESS, 1); - runner.getFlowFilesForRelationship(PutHive_1_1QL.REL_SUCCESS) - .forEach(f -> f.assertAttributeEquals(PutHive_1_1QL.ATTR_OUTPUT_TABLES, "PERSONS")); - - // Now we can check that the values were inserted by the multi-statement script. - try (final Connection conn = service.getConnection()) { - try (final Statement stmt = conn.createStatement()) { - final ResultSet rs = stmt.executeQuery("SELECT * FROM PERSONS"); - assertTrue(rs.next()); - assertEquals(1, rs.getInt(1), "Record ID mismatch"); - assertEquals( "George", rs.getString(2), "Record NAME mismatch"); - } - } - } - - @Test - public void testMultipleStatementsWithinFlowFilePlusEmbeddedDelimiter(@TempDir Path tempDir) throws InitializationException, ProcessException, SQLException, IOException { - final TestRunner runner = TestRunners.newTestRunner(PutHive_1_1QL.class); - final File dbDir = tempDir.resolve("db").toFile(); - final DBCPService service = new MockDBCPService(dbDir.getAbsolutePath()); - runner.addControllerService("dbcp", service); - runner.enableControllerService(service); - - try (final Connection conn = service.getConnection()) { - try (final Statement stmt = conn.createStatement()) { - stmt.executeUpdate(createPersons); - } - } - - runner.setProperty(PutHive_1_1QL.HIVE_DBCP_SERVICE, "dbcp"); - - final String sql = "INSERT INTO PERSONS (ID, NAME, CODE) VALUES (?, ?, ?); " + - "UPDATE PERSONS SET NAME='George\\;' WHERE ID=?; "; - final Map attributes = new HashMap<>(); - attributes.put("hiveql.args.1.type", String.valueOf(Types.INTEGER)); - attributes.put("hiveql.args.1.value", "1"); - - attributes.put("hiveql.args.2.type", String.valueOf(Types.VARCHAR)); - attributes.put("hiveql.args.2.value", "Mark"); - - attributes.put("hiveql.args.3.type", String.valueOf(Types.INTEGER)); - attributes.put("hiveql.args.3.value", "84"); - - attributes.put("hiveql.args.4.type", String.valueOf(Types.INTEGER)); - attributes.put("hiveql.args.4.value", "1"); - - runner.enqueue(sql.getBytes(), attributes); - runner.run(); - - // should fail because of the semicolon - runner.assertAllFlowFilesTransferred(PutHive_1_1QL.REL_SUCCESS, 1); - - // Now we can check that the values were inserted by the multi-statement script. - try (final Connection conn = service.getConnection()) { - try (final Statement stmt = conn.createStatement()) { - final ResultSet rs = stmt.executeQuery("SELECT * FROM PERSONS"); - assertTrue(rs.next()); - assertEquals(1, rs.getInt(1), "Record ID mismatch"); - assertEquals( "George\\;", rs.getString(2), "Record NAME mismatch"); - } - } - } - - - @Test - public void testWithNullParameter(@TempDir Path tempDir) throws InitializationException, ProcessException, SQLException, IOException { - final TestRunner runner = TestRunners.newTestRunner(PutHive_1_1QL.class); - final File dbDir = tempDir.resolve("db").toFile(); - final DBCPService service = new MockDBCPService(dbDir.getAbsolutePath()); - runner.addControllerService("dbcp", service); - runner.enableControllerService(service); - - try (final Connection conn = service.getConnection()) { - try (final Statement stmt = conn.createStatement()) { - stmt.executeUpdate(createPersons); - } - } - - runner.setProperty(PutHive_1_1QL.HIVE_DBCP_SERVICE, "dbcp"); - final Map attributes = new HashMap<>(); - attributes.put("hiveql.args.1.type", String.valueOf(Types.INTEGER)); - attributes.put("hiveql.args.1.value", "1"); - - attributes.put("hiveql.args.2.type", String.valueOf(Types.VARCHAR)); - attributes.put("hiveql.args.2.value", "Mark"); - - attributes.put("hiveql.args.3.type", String.valueOf(Types.INTEGER)); - - runner.enqueue("INSERT INTO PERSONS (ID, NAME, CODE) VALUES (?, ?, ?)".getBytes(), attributes); - runner.run(); - - runner.assertAllFlowFilesTransferred(PutHive_1_1QL.REL_SUCCESS, 1); - - try (final Connection conn = service.getConnection()) { - try (final Statement stmt = conn.createStatement()) { - final ResultSet rs = stmt.executeQuery("SELECT * FROM PERSONS"); - assertTrue(rs.next()); - assertEquals(1, rs.getInt(1)); - assertEquals("Mark", rs.getString(2)); - assertEquals(0, rs.getInt(3)); - assertFalse(rs.next()); - } - } - } - - @Test - public void testInvalidStatement(@TempDir Path tempDir) throws InitializationException, ProcessException, SQLException, IOException { - final TestRunner runner = TestRunners.newTestRunner(PutHive_1_1QL.class); - final File dbDir = tempDir.resolve("db").toFile(); - final DBCPService service = new MockDBCPService(dbDir.getAbsolutePath()); - runner.addControllerService("dbcp", service); - runner.enableControllerService(service); - - try (final Connection conn = service.getConnection()) { - try (final Statement stmt = conn.createStatement()) { - stmt.executeUpdate(createPersons); - } - } - - runner.setProperty(PutHive_1_1QL.HIVE_DBCP_SERVICE, "dbcp"); - - final String sql = "INSERT INTO PERSONS (ID, NAME, CODE) VALUES (?, ?, ?); " + - "UPDATE SOME_RANDOM_TABLE NAME='George' WHERE ID=?; "; - final Map attributes = new HashMap<>(); - attributes.put("hiveql.args.1.type", String.valueOf(Types.INTEGER)); - attributes.put("hiveql.args.1.value", "1"); - - attributes.put("hiveql.args.2.type", String.valueOf(Types.VARCHAR)); - attributes.put("hiveql.args.2.value", "Mark"); - - attributes.put("hiveql.args.3.type", String.valueOf(Types.INTEGER)); - attributes.put("hiveql.args.3.value", "84"); - - attributes.put("hiveql.args.4.type", String.valueOf(Types.INTEGER)); - attributes.put("hiveql.args.4.value", "1"); - - runner.enqueue(sql.getBytes(), attributes); - runner.run(); - - // should fail because of the table is invalid - runner.assertAllFlowFilesTransferred(PutHive_1_1QL.REL_FAILURE, 1); - - try (final Connection conn = service.getConnection()) { - try (final Statement stmt = conn.createStatement()) { - final ResultSet rs = stmt.executeQuery("SELECT * FROM PERSONS"); - assertTrue(rs.next()); - } - } - } - - - @Test - public void testRetryableFailure() throws InitializationException, ProcessException, SQLException, IOException { - final TestRunner runner = TestRunners.newTestRunner(PutHive_1_1QL.class); - final DBCPService service = new SQLExceptionService(null); - runner.addControllerService("dbcp", service); - runner.enableControllerService(service); - - runner.setProperty(PutHive_1_1QL.HIVE_DBCP_SERVICE, "dbcp"); - - final String sql = "INSERT INTO PERSONS (ID, NAME, CODE) VALUES (?, ?, ?); " + - "UPDATE PERSONS SET NAME='George' WHERE ID=?; "; - - final Map attributes = new HashMap<>(); - attributes.put("hiveql.args.1.type", String.valueOf(Types.INTEGER)); - attributes.put("hiveql.args.1.value", "1"); - - attributes.put("hiveql.args.2.type", String.valueOf(Types.VARCHAR)); - attributes.put("hiveql.args.2.value", "Mark"); - - attributes.put("hiveql.args.3.type", String.valueOf(Types.INTEGER)); - attributes.put("hiveql.args.3.value", "84"); - - attributes.put("hiveql.args.4.type", String.valueOf(Types.INTEGER)); - attributes.put("hiveql.args.4.value", "1"); - - runner.enqueue(sql.getBytes(), attributes); - runner.run(); - - // should fail because there isn't a valid connection and tables don't exist. - runner.assertAllFlowFilesTransferred(PutHive_1_1QL.REL_RETRY, 1); - } - - @Test - public void testRetryableFailureRollbackOnFailure() throws InitializationException, ProcessException, SQLException, IOException { - final TestRunner runner = TestRunners.newTestRunner(PutHive_1_1QL.class); - final DBCPService service = new SQLExceptionService(null); - runner.addControllerService("dbcp", service); - runner.enableControllerService(service); - - runner.setProperty(PutHive_1_1QL.HIVE_DBCP_SERVICE, "dbcp"); - runner.setProperty(RollbackOnFailure.ROLLBACK_ON_FAILURE, "true"); - - final String sql = "INSERT INTO PERSONS (ID, NAME, CODE) VALUES (?, ?, ?); " + - "UPDATE PERSONS SET NAME='George' WHERE ID=?; "; - - final Map attributes = new HashMap<>(); - attributes.put("hiveql.args.1.type", String.valueOf(Types.INTEGER)); - attributes.put("hiveql.args.1.value", "1"); - - attributes.put("hiveql.args.2.type", String.valueOf(Types.VARCHAR)); - attributes.put("hiveql.args.2.value", "Mark"); - - attributes.put("hiveql.args.3.type", String.valueOf(Types.INTEGER)); - attributes.put("hiveql.args.3.value", "84"); - - attributes.put("hiveql.args.4.type", String.valueOf(Types.INTEGER)); - attributes.put("hiveql.args.4.value", "1"); - - runner.enqueue(sql.getBytes(), attributes); - - AssertionError e = assertThrows(AssertionError.class, () -> runner.run()); - assertTrue(e.getCause() instanceof ProcessException); - - assertEquals(1, runner.getQueueSize().getObjectCount()); - runner.assertAllFlowFilesTransferred(PutHive_1_1QL.REL_RETRY, 0); - } - - @Test - public void testUnknownFailure() throws InitializationException, ProcessException { - final TestRunner runner = TestRunners.newTestRunner(PutHive_1_1QL.class); - final SQLExceptionService service = new SQLExceptionService(null); - service.setErrorCode(2); - runner.addControllerService("dbcp", service); - runner.enableControllerService(service); - - runner.setProperty(PutHive_1_1QL.HIVE_DBCP_SERVICE, "dbcp"); - - final String sql = "INSERT INTO PERSONS (ID, NAME, CODE) VALUES (?, ?, ?); " + - "UPDATE PERSONS SET NAME='George' WHERE ID=?; "; - - final Map attributes = new HashMap<>(); - attributes.put("hiveql.args.1.type", String.valueOf(Types.INTEGER)); - attributes.put("hiveql.args.1.value", "1"); - - attributes.put("hiveql.args.2.type", String.valueOf(Types.VARCHAR)); - attributes.put("hiveql.args.2.value", "Mark"); - - attributes.put("hiveql.args.3.type", String.valueOf(Types.INTEGER)); - attributes.put("hiveql.args.3.value", "84"); - - attributes.put("hiveql.args.4.type", String.valueOf(Types.INTEGER)); - attributes.put("hiveql.args.4.value", "1"); - - runner.enqueue(sql.getBytes(), attributes); - runner.run(); - - // should fail because there isn't a valid connection and tables don't exist. - runner.assertAllFlowFilesTransferred(PutHive_1_1QL.REL_RETRY, 1); - } - - @Test - public void testUnknownFailureRollbackOnFailure() throws InitializationException, ProcessException { - final TestRunner runner = TestRunners.newTestRunner(PutHive_1_1QL.class); - final SQLExceptionService service = new SQLExceptionService(null); - service.setErrorCode(0); - runner.addControllerService("dbcp", service); - runner.enableControllerService(service); - - runner.setProperty(PutHive_1_1QL.HIVE_DBCP_SERVICE, "dbcp"); - runner.setProperty(RollbackOnFailure.ROLLBACK_ON_FAILURE, "true"); - - final String sql = "INSERT INTO PERSONS (ID, NAME, CODE) VALUES (?, ?, ?); " + - "UPDATE PERSONS SET NAME='George' WHERE ID=?; "; - - final Map attributes = new HashMap<>(); - attributes.put("hiveql.args.1.type", String.valueOf(Types.INTEGER)); - attributes.put("hiveql.args.1.value", "1"); - - attributes.put("hiveql.args.2.type", String.valueOf(Types.VARCHAR)); - attributes.put("hiveql.args.2.value", "Mark"); - - attributes.put("hiveql.args.3.type", String.valueOf(Types.INTEGER)); - attributes.put("hiveql.args.3.value", "84"); - - attributes.put("hiveql.args.4.type", String.valueOf(Types.INTEGER)); - attributes.put("hiveql.args.4.value", "1"); - - runner.enqueue(sql.getBytes(), attributes); - - AssertionError e = assertThrows(AssertionError.class, () -> runner.run()); - assertTrue(e.getCause() instanceof ProcessException); - - assertEquals(1, runner.getQueueSize().getObjectCount()); - runner.assertAllFlowFilesTransferred(PutHive_1_1QL.REL_RETRY, 0); - } - - /** - * Simple implementation only for testing purposes - */ - private static class MockDBCPService extends AbstractControllerService implements Hive_1_1DBCPService { - private final String dbLocation; - - MockDBCPService(final String dbLocation) { - this.dbLocation = dbLocation; - } - - @Override - public String getIdentifier() { - return "dbcp"; - } - - @Override - public Connection getConnection() throws ProcessException { - try { - Class.forName("org.apache.derby.jdbc.EmbeddedDriver"); - return DriverManager.getConnection("jdbc:derby:" + dbLocation + ";create=true"); - } catch (final Exception e) { - e.printStackTrace(); - throw new ProcessException("getConnection failed: " + e); - } - } - - @Override - public String getConnectionURL() { - return "jdbc:derby:" + dbLocation + ";create=true"; - } - } - - /** - * Simple implementation only for testing purposes - */ - private static class SQLExceptionService extends AbstractControllerService implements Hive_1_1DBCPService { - private final Hive_1_1DBCPService service; - private int allowedBeforeFailure = 0; - private int successful = 0; - private int errorCode = 30000; // Default to a retryable exception code - - SQLExceptionService(final Hive_1_1DBCPService service) { - this.service = service; - } - - @Override - public String getIdentifier() { - return "dbcp"; - } - - @Override - public Connection getConnection() throws ProcessException { - try { - if (++successful > allowedBeforeFailure) { - final Connection conn = Mockito.mock(Connection.class); - Mockito.when(conn.prepareStatement(Mockito.any(String.class))).thenThrow(new SQLException("Unit Test Generated SQLException", "42000", errorCode)); - return conn; - } else { - return service.getConnection(); - } - } catch (final Exception e) { - e.printStackTrace(); - throw new ProcessException("getConnection failed: " + e); - } - } - - @Override - public String getConnectionURL() { - return service != null ? service.getConnectionURL() : null; - } - - void setErrorCode(int errorCode) { - this.errorCode = errorCode; - } - } -} diff --git a/nifi-nar-bundles/nifi-hive-bundle/nifi-hive_1_1-processors/src/test/java/org/apache/nifi/processors/hive/TestSelectHive_1_1QL.java b/nifi-nar-bundles/nifi-hive-bundle/nifi-hive_1_1-processors/src/test/java/org/apache/nifi/processors/hive/TestSelectHive_1_1QL.java deleted file mode 100644 index 46e77b9712..0000000000 --- a/nifi-nar-bundles/nifi-hive-bundle/nifi-hive_1_1-processors/src/test/java/org/apache/nifi/processors/hive/TestSelectHive_1_1QL.java +++ /dev/null @@ -1,661 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.nifi.processors.hive; - -import org.apache.avro.file.DataFileStream; -import org.apache.avro.generic.GenericDatumReader; -import org.apache.avro.generic.GenericRecord; -import org.apache.avro.io.DatumReader; -import org.apache.nifi.controller.AbstractControllerService; -import org.apache.nifi.dbcp.DBCPService; -import org.apache.nifi.dbcp.hive.Hive_1_1DBCPService; -import org.apache.nifi.flowfile.attributes.CoreAttributes; -import org.apache.nifi.processor.exception.ProcessException; -import org.apache.nifi.provenance.ProvenanceEventRecord; -import org.apache.nifi.provenance.ProvenanceEventType; -import org.apache.nifi.reporting.InitializationException; -import org.apache.nifi.util.MockFlowFile; -import org.apache.nifi.util.TestRunner; -import org.apache.nifi.util.TestRunners; -import org.apache.nifi.util.hive.HiveJdbcCommon; -import org.junit.jupiter.api.BeforeAll; -import org.junit.jupiter.api.BeforeEach; -import org.junit.jupiter.api.Test; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import java.io.BufferedReader; -import java.io.ByteArrayInputStream; -import java.io.File; -import java.io.IOException; -import java.io.InputStream; -import java.io.InputStreamReader; -import java.sql.Connection; -import java.sql.DriverManager; -import java.sql.SQLException; -import java.sql.Statement; -import java.sql.Types; -import java.util.HashMap; -import java.util.List; -import java.util.Map; -import java.util.Random; - -import static org.apache.nifi.processors.hive.SelectHive_1_1QL.HIVEQL_OUTPUT_FORMAT; -import static org.apache.nifi.util.hive.HiveJdbcCommon.AVRO; -import static org.apache.nifi.util.hive.HiveJdbcCommon.CSV; -import static org.apache.nifi.util.hive.HiveJdbcCommon.CSV_MIME_TYPE; -import static org.apache.nifi.util.hive.HiveJdbcCommon.MIME_TYPE_AVRO_BINARY; -import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.assertTrue; - -public class TestSelectHive_1_1QL { - - private static final Logger LOGGER; - private final static String MAX_ROWS_KEY = "maxRows"; - private final int NUM_OF_ROWS = 100; - - - static { - System.setProperty("org.slf4j.simpleLogger.defaultLogLevel", "info"); - System.setProperty("org.slf4j.simpleLogger.showDateTime", "true"); - System.setProperty("org.slf4j.simpleLogger.log.nifi.io.nio", "debug"); - System.setProperty("org.slf4j.simpleLogger.log.nifi.processors.hive.SelectHive_1_1QL", "debug"); - System.setProperty("org.slf4j.simpleLogger.log.nifi.processors.hive.TestSelectHive_1_1QL", "debug"); - LOGGER = LoggerFactory.getLogger(TestSelectHive_1_1QL.class); - } - - private final static String DB_LOCATION = "target/db"; - - private final static String QUERY_WITH_EL = "select " - + " PER.ID as PersonId, PER.NAME as PersonName, PER.CODE as PersonCode" - + " from persons PER" - + " where PER.ID > ${person.id}"; - - private final static String QUERY_WITHOUT_EL = "select " - + " PER.ID as PersonId, PER.NAME as PersonName, PER.CODE as PersonCode" - + " from persons PER" - + " where PER.ID > 10"; - - - @BeforeAll - public static void setupClass() { - System.setProperty("derby.stream.error.file", "target/derby.log"); - } - - private TestRunner runner; - - @BeforeEach - public void setup() throws InitializationException { - final DBCPService dbcp = new DBCPServiceSimpleImpl(); - final Map dbcpProperties = new HashMap<>(); - - runner = TestRunners.newTestRunner(SelectHive_1_1QL.class); - runner.addControllerService("dbcp", dbcp, dbcpProperties); - runner.enableControllerService(dbcp); - runner.setProperty(SelectHive_1_1QL.HIVE_DBCP_SERVICE, "dbcp"); - } - - @Test - public void testIncomingConnectionWithNoFlowFile() { - runner.setIncomingConnection(true); - runner.setProperty(SelectHive_1_1QL.HIVEQL_SELECT_QUERY, "SELECT * FROM persons"); - runner.run(); - runner.assertTransferCount(SelectHive_1_1QL.REL_SUCCESS, 0); - runner.assertTransferCount(SelectHive_1_1QL.REL_FAILURE, 0); - } - - @Test - public void testNoIncomingConnection() throws ClassNotFoundException, SQLException, InitializationException, IOException { - runner.setIncomingConnection(false); - invokeOnTrigger(QUERY_WITHOUT_EL, false, "Avro"); - - final List provenanceEvents = runner.getProvenanceEvents(); - final ProvenanceEventRecord provenance0 = provenanceEvents.get(0); - assertEquals(ProvenanceEventType.RECEIVE, provenance0.getEventType()); - assertEquals("jdbc:derby:target/db;create=true", provenance0.getTransitUri()); - } - - @Test - public void testNoTimeLimit() throws InitializationException, ClassNotFoundException, SQLException, IOException { - invokeOnTrigger(QUERY_WITH_EL, true, "Avro"); - - final List provenanceEvents = runner.getProvenanceEvents(); - assertEquals(4, provenanceEvents.size()); - - final ProvenanceEventRecord provenance0 = provenanceEvents.get(0); - assertEquals(ProvenanceEventType.FORK, provenance0.getEventType()); - - final ProvenanceEventRecord provenance1 = provenanceEvents.get(1); - assertEquals(ProvenanceEventType.FETCH, provenance1.getEventType()); - assertEquals("jdbc:derby:target/db;create=true", provenance1.getTransitUri()); - - final ProvenanceEventRecord provenance2 = provenanceEvents.get(2); - assertEquals(ProvenanceEventType.FORK, provenance2.getEventType()); - - final ProvenanceEventRecord provenance3 = provenanceEvents.get(3); - assertEquals(ProvenanceEventType.DROP, provenance3.getEventType()); - } - - - @Test - public void testWithNullIntColumn() throws SQLException { - // remove previous test database, if any - final File dbLocation = new File(DB_LOCATION); - dbLocation.delete(); - - // load test data to database - final Connection con = ((Hive_1_1DBCPService) runner.getControllerService("dbcp")).getConnection(); - Statement stmt = con.createStatement(); - - try { - stmt.execute("drop table TEST_NULL_INT"); - } catch (final SQLException sqle) { - // Nothing to do, probably means the table didn't exist - } - - stmt.execute("create table TEST_NULL_INT (id integer not null, val1 integer, val2 integer, constraint my_pk primary key (id))"); - - stmt.execute("insert into TEST_NULL_INT (id, val1, val2) VALUES (0, NULL, 1)"); - stmt.execute("insert into TEST_NULL_INT (id, val1, val2) VALUES (1, 1, 1)"); - - runner.setIncomingConnection(false); - runner.setProperty(SelectHive_1_1QL.HIVEQL_SELECT_QUERY, "SELECT * FROM TEST_NULL_INT"); - runner.run(); - - runner.assertAllFlowFilesTransferred(SelectHive_1_1QL.REL_SUCCESS, 1); - runner.getFlowFilesForRelationship(SelectHive_1_1QL.REL_SUCCESS).get(0).assertAttributeEquals(SelectHive_1_1QL.RESULT_ROW_COUNT, "2"); - } - - @Test - public void testWithSqlException() throws SQLException { - // remove previous test database, if any - final File dbLocation = new File(DB_LOCATION); - dbLocation.delete(); - - // load test data to database - final Connection con = ((Hive_1_1DBCPService) runner.getControllerService("dbcp")).getConnection(); - Statement stmt = con.createStatement(); - - try { - stmt.execute("drop table TEST_NO_ROWS"); - } catch (final SQLException sqle) { - // Nothing to do, probably means the table didn't exist - } - - stmt.execute("create table TEST_NO_ROWS (id integer)"); - - runner.setIncomingConnection(false); - // Try a valid SQL statement that will generate an error (val1 does not exist, e.g.) - runner.setProperty(SelectHive_1_1QL.HIVEQL_SELECT_QUERY, "SELECT val1 FROM TEST_NO_ROWS"); - runner.run(); - - runner.assertAllFlowFilesTransferred(SelectHive_1_1QL.REL_FAILURE, 1); - } - - @Test - public void invokeOnTriggerExceptionInPreQueriesNoIncomingFlows() - throws InitializationException, ClassNotFoundException, SQLException, IOException { - - doOnTrigger(QUERY_WITHOUT_EL, false, CSV, - "select 'no exception' from persons; select exception from persons", - null); - - runner.assertAllFlowFilesTransferred(SelectHive_1_1QL.REL_FAILURE, 1); - } - - @Test - public void invokeOnTriggerExceptionInPreQueriesWithIncomingFlows() - throws InitializationException, ClassNotFoundException, SQLException, IOException { - - doOnTrigger(QUERY_WITHOUT_EL, true, CSV, - "select 'no exception' from persons; select exception from persons", - null); - - runner.assertAllFlowFilesTransferred(SelectHive_1_1QL.REL_FAILURE, 1); - } - - @Test - public void invokeOnTriggerExceptionInPostQueriesNoIncomingFlows() - throws InitializationException, ClassNotFoundException, SQLException, IOException { - - doOnTrigger(QUERY_WITHOUT_EL, false, CSV, - null, - "select 'no exception' from persons; select exception from persons"); - - runner.assertAllFlowFilesTransferred(SelectHive_1_1QL.REL_FAILURE, 1); - } - - @Test - public void invokeOnTriggerExceptionInPostQueriesWithIncomingFlows() - throws InitializationException, ClassNotFoundException, SQLException, IOException { - - doOnTrigger(QUERY_WITHOUT_EL, true, CSV, - null, - "select 'no exception' from persons; select exception from persons"); - - // with incoming connections, it should be rolled back - runner.assertAllFlowFilesTransferred(SelectHive_1_1QL.REL_FAILURE, 1); - } - - @Test - public void testWithBadSQL() throws SQLException { - final String BAD_SQL = "create table TEST_NO_ROWS (id integer)"; - - // Test with incoming flow file (it should be routed to failure intact, i.e. same content and no parent) - runner.setIncomingConnection(true); - // Try a valid SQL statement that will generate an error (val1 does not exist, e.g.) - runner.enqueue(BAD_SQL); - runner.run(); - runner.assertAllFlowFilesTransferred(SelectHive_1_1QL.REL_FAILURE, 1); - MockFlowFile flowFile = runner.getFlowFilesForRelationship(SelectHive_1_1QL.REL_FAILURE).get(0); - flowFile.assertContentEquals(BAD_SQL); - flowFile.assertAttributeEquals("parentIds", null); - runner.clearTransferState(); - - // Test with no incoming flow file (an empty flow file is transferred) - runner.setIncomingConnection(false); - // Try a valid SQL statement that will generate an error (val1 does not exist, e.g.) - runner.setProperty(SelectHive_1_1QL.HIVEQL_SELECT_QUERY, BAD_SQL); - runner.run(); - runner.assertAllFlowFilesTransferred(SelectHive_1_1QL.REL_FAILURE, 1); - flowFile = runner.getFlowFilesForRelationship(SelectHive_1_1QL.REL_FAILURE).get(0); - flowFile.assertContentEquals(""); - } - - @Test - public void invokeOnTriggerWithCsv() - throws InitializationException, ClassNotFoundException, SQLException, IOException { - invokeOnTrigger(QUERY_WITHOUT_EL, false, CSV); - } - - @Test - public void invokeOnTriggerWithAvro() - throws InitializationException, ClassNotFoundException, SQLException, IOException { - invokeOnTrigger(QUERY_WITHOUT_EL, false, AVRO); - } - - @Test - public void invokeOnTriggerWithValidPreQieries() - throws InitializationException, ClassNotFoundException, SQLException, IOException { - invokeOnTrigger(QUERY_WITHOUT_EL, false, CSV, - "select '1' from persons; select '2' from persons", //should not be 'select'. But Derby driver doesn't support "set param=val" format. - null); - } - - @Test - public void invokeOnTriggerWithValidPostQieries() - throws InitializationException, ClassNotFoundException, SQLException, IOException { - invokeOnTrigger(QUERY_WITHOUT_EL, false, CSV, - null, - //should not be 'select'. But Derby driver doesn't support "set param=val" format, - //so just providing any "compilable" query. - " select '4' from persons; \nselect '5' from persons"); - } - - @Test - public void invokeOnTriggerWithValidPrePostQieries() - throws InitializationException, ClassNotFoundException, SQLException, IOException { - invokeOnTrigger(QUERY_WITHOUT_EL, false, CSV, - //should not be 'select'. But Derby driver doesn't support "set param=val" format, - //so just providing any "compilable" query. - "select '1' from persons; select '2' from persons", - " select '4' from persons; \nselect '5' from persons"); - } - - - public void invokeOnTrigger(final String query, final boolean incomingFlowFile, String outputFormat) - throws InitializationException, ClassNotFoundException, SQLException, IOException { - invokeOnTrigger(query, incomingFlowFile, outputFormat, null, null); - } - - public void invokeOnTrigger(final String query, final boolean incomingFlowFile, String outputFormat, - String preQueries, String postQueries) - throws InitializationException, ClassNotFoundException, SQLException, IOException { - - TestRunner runner = doOnTrigger(query, incomingFlowFile, outputFormat, preQueries, postQueries); - runner.assertAllFlowFilesTransferred(SelectHive_1_1QL.REL_SUCCESS, 1); - - final List flowfiles = runner.getFlowFilesForRelationship(SelectHive_1_1QL.REL_SUCCESS); - MockFlowFile flowFile = flowfiles.get(0); - final InputStream in = new ByteArrayInputStream(flowFile.toByteArray()); - long recordsFromStream = 0; - if (AVRO.equals(outputFormat)) { - assertEquals(MIME_TYPE_AVRO_BINARY, flowFile.getAttribute(CoreAttributes.MIME_TYPE.key())); - final DatumReader datumReader = new GenericDatumReader<>(); - try (DataFileStream dataFileReader = new DataFileStream<>(in, datumReader)) { - GenericRecord record = null; - while (dataFileReader.hasNext()) { - // Reuse record object by passing it to next(). This saves us from - // allocating and garbage collecting many objects for files with - // many items. - record = dataFileReader.next(record); - recordsFromStream++; - } - } - } else { - assertEquals(CSV_MIME_TYPE, flowFile.getAttribute(CoreAttributes.MIME_TYPE.key())); - BufferedReader br = new BufferedReader(new InputStreamReader(in)); - - String headerRow = br.readLine(); - // Derby capitalizes column names - assertEquals("PERSONID,PERSONNAME,PERSONCODE", headerRow); - - // Validate rows - String line; - while ((line = br.readLine()) != null) { - recordsFromStream++; - String[] values = line.split(","); - if (recordsFromStream < (NUM_OF_ROWS - 10)) { - assertEquals(3, values.length); - assertTrue(values[1].startsWith("\"")); - assertTrue(values[1].endsWith("\"")); - } else { - assertEquals(2, values.length); // Middle value is null - } - } - } - assertEquals(NUM_OF_ROWS - 10, recordsFromStream); - assertEquals(recordsFromStream, Integer.parseInt(flowFile.getAttribute(SelectHive_1_1QL.RESULT_ROW_COUNT))); - flowFile.assertAttributeEquals(AbstractHive_1_1QLProcessor.ATTR_INPUT_TABLES, "persons"); - } - - public TestRunner doOnTrigger(final String query, final boolean incomingFlowFile, String outputFormat, - String preQueries, String postQueries) - throws InitializationException, ClassNotFoundException, SQLException, IOException { - - // remove previous test database, if any - final File dbLocation = new File(DB_LOCATION); - dbLocation.delete(); - - // load test data to database - final Connection con = ((Hive_1_1DBCPService) runner.getControllerService("dbcp")).getConnection(); - final Statement stmt = con.createStatement(); - try { - stmt.execute("drop table persons"); - } catch (final SQLException sqle) { - // Nothing to do here, the table didn't exist - } - - stmt.execute("create table persons (id integer, name varchar(100), code integer)"); - Random rng = new Random(53496); - stmt.executeUpdate("insert into persons values (1, 'Joe Smith', " + rng.nextInt(469947) + ")"); - for (int i = 2; i < NUM_OF_ROWS; i++) { - stmt.executeUpdate("insert into persons values (" + i + ", 'Someone Else', " + rng.nextInt(469947) + ")"); - } - stmt.executeUpdate("insert into persons values (" + NUM_OF_ROWS + ", 'Last Person', NULL)"); - - LOGGER.info("test data loaded"); - - runner.setProperty(SelectHive_1_1QL.HIVEQL_SELECT_QUERY, query); - runner.setProperty(HIVEQL_OUTPUT_FORMAT, outputFormat); - if (preQueries != null) { - runner.setProperty(SelectHive_1_1QL.HIVEQL_PRE_QUERY, preQueries); - } - if (postQueries != null) { - runner.setProperty(SelectHive_1_1QL.HIVEQL_POST_QUERY, postQueries); - } - - if (incomingFlowFile) { - // incoming FlowFile content is not used, but attributes are used - final Map attributes = new HashMap<>(); - attributes.put("person.id", "10"); - runner.enqueue("Hello".getBytes(), attributes); - } - - runner.setIncomingConnection(incomingFlowFile); - runner.run(); - - return runner; - } - - @Test - public void testMaxRowsPerFlowFileAvro() throws ClassNotFoundException, SQLException, InitializationException, IOException { - - // load test data to database - final Connection con = ((DBCPService) runner.getControllerService("dbcp")).getConnection(); - Statement stmt = con.createStatement(); - InputStream in; - MockFlowFile mff; - - try { - stmt.execute("drop table TEST_QUERY_DB_TABLE"); - } catch (final SQLException sqle) { - // Ignore this error, probably a "table does not exist" since Derby doesn't yet support DROP IF EXISTS [DERBY-4842] - } - - stmt.execute("create table TEST_QUERY_DB_TABLE (id integer not null, name varchar(100), scale float, created_on timestamp, bignum bigint default 0)"); - int rowCount = 0; - //create larger row set - for (int batch = 0; batch < 100; batch++) { - stmt.execute("insert into TEST_QUERY_DB_TABLE (id, name, scale, created_on) VALUES (" + rowCount + ", 'Joe Smith', 1.0, '1962-09-23 03:23:34.234')"); - rowCount++; - } - - runner.setIncomingConnection(false); - runner.setProperty(SelectHive_1_1QL.HIVEQL_SELECT_QUERY, "SELECT * FROM TEST_QUERY_DB_TABLE"); - runner.setProperty(SelectHive_1_1QL.MAX_ROWS_PER_FLOW_FILE, "${" + MAX_ROWS_KEY + "}"); - runner.setProperty(SelectHive_1_1QL.HIVEQL_OUTPUT_FORMAT, HiveJdbcCommon.AVRO); - runner.setVariable(MAX_ROWS_KEY, "9"); - - runner.run(); - runner.assertAllFlowFilesTransferred(SelectHive_1_1QL.REL_SUCCESS, 12); - - //ensure all but the last file have 9 records each - for (int ff = 0; ff < 11; ff++) { - mff = runner.getFlowFilesForRelationship(SelectHive_1_1QL.REL_SUCCESS).get(ff); - in = new ByteArrayInputStream(mff.toByteArray()); - assertEquals(9, getNumberOfRecordsFromStream(in)); - - mff.assertAttributeExists("fragment.identifier"); - assertEquals(Integer.toString(ff), mff.getAttribute("fragment.index")); - assertEquals("12", mff.getAttribute("fragment.count")); - } - - //last file should have 1 record - mff = runner.getFlowFilesForRelationship(SelectHive_1_1QL.REL_SUCCESS).get(11); - in = new ByteArrayInputStream(mff.toByteArray()); - assertEquals(1, getNumberOfRecordsFromStream(in)); - mff.assertAttributeExists("fragment.identifier"); - assertEquals(Integer.toString(11), mff.getAttribute("fragment.index")); - assertEquals("12", mff.getAttribute("fragment.count")); - runner.clearTransferState(); - } - - @Test - public void testParametrizedQuery() throws ClassNotFoundException, SQLException, InitializationException, IOException { - // load test data to database - final Connection con = ((DBCPService) runner.getControllerService("dbcp")).getConnection(); - Statement stmt = con.createStatement(); - - try { - stmt.execute("drop table TEST_QUERY_DB_TABLE"); - } catch (final SQLException sqle) { - // Ignore this error, probably a "table does not exist" since Derby doesn't yet support DROP IF EXISTS [DERBY-4842] - } - - stmt.execute("create table TEST_QUERY_DB_TABLE (id integer not null, name varchar(100), scale float, created_on timestamp, bignum bigint default 0)"); - int rowCount = 0; - //create larger row set - for (int batch = 0; batch < 100; batch++) { - stmt.execute("insert into TEST_QUERY_DB_TABLE (id, name, scale, created_on) VALUES (" + rowCount + ", 'Joe Smith', 1.0, '1962-09-23 03:23:34.234')"); - rowCount++; - } - - runner.setIncomingConnection(true); - runner.setProperty(SelectHive_1_1QL.MAX_ROWS_PER_FLOW_FILE, "${" + MAX_ROWS_KEY + "}"); - runner.setProperty(SelectHive_1_1QL.HIVEQL_OUTPUT_FORMAT, HiveJdbcCommon.AVRO); - runner.setVariable(MAX_ROWS_KEY, "9"); - - Map attributes = new HashMap(); - attributes.put("hiveql.args.1.value", "1"); - attributes.put("hiveql.args.1.type", String.valueOf(Types.INTEGER)); - runner.enqueue("SELECT * FROM TEST_QUERY_DB_TABLE WHERE id = ?", attributes ); - - runner.run(); - runner.assertAllFlowFilesTransferred(SelectHive_1_1QL.REL_SUCCESS, 1); - MockFlowFile flowFile = runner.getFlowFilesForRelationship(SelectHive_1_1QL.REL_SUCCESS).get(0); - // Assert the attributes from the incoming flow file are preserved in the outgoing flow file(s) - flowFile.assertAttributeEquals("hiveql.args.1.value", "1"); - flowFile.assertAttributeEquals("hiveql.args.1.type", String.valueOf(Types.INTEGER)); - runner.clearTransferState(); - } - - @Test - public void testMaxRowsPerFlowFileCSV() throws ClassNotFoundException, SQLException, InitializationException, IOException { - - // load test data to database - final Connection con = ((DBCPService) runner.getControllerService("dbcp")).getConnection(); - Statement stmt = con.createStatement(); - InputStream in; - MockFlowFile mff; - - try { - stmt.execute("drop table TEST_QUERY_DB_TABLE"); - } catch (final SQLException sqle) { - // Ignore this error, probably a "table does not exist" since Derby doesn't yet support DROP IF EXISTS [DERBY-4842] - } - - stmt.execute("create table TEST_QUERY_DB_TABLE (id integer not null, name varchar(100), scale float, created_on timestamp, bignum bigint default 0)"); - int rowCount = 0; - //create larger row set - for (int batch = 0; batch < 100; batch++) { - stmt.execute("insert into TEST_QUERY_DB_TABLE (id, name, scale, created_on) VALUES (" + rowCount + ", 'Joe Smith', 1.0, '1962-09-23 03:23:34.234')"); - rowCount++; - } - - runner.setIncomingConnection(true); - runner.setProperty(SelectHive_1_1QL.MAX_ROWS_PER_FLOW_FILE, "${" + MAX_ROWS_KEY + "}"); - runner.setProperty(SelectHive_1_1QL.HIVEQL_OUTPUT_FORMAT, HiveJdbcCommon.CSV); - - runner.enqueue("SELECT * FROM TEST_QUERY_DB_TABLE", new HashMap() {{ - put(MAX_ROWS_KEY, "9"); - }}); - - runner.run(); - runner.assertAllFlowFilesTransferred(SelectHive_1_1QL.REL_SUCCESS, 12); - - //ensure all but the last file have 9 records (10 lines = 9 records + header) each - for (int ff = 0; ff < 11; ff++) { - mff = runner.getFlowFilesForRelationship(SelectHive_1_1QL.REL_SUCCESS).get(ff); - in = new ByteArrayInputStream(mff.toByteArray()); - BufferedReader br = new BufferedReader(new InputStreamReader(in)); - assertEquals(10, br.lines().count()); - - mff.assertAttributeExists("fragment.identifier"); - assertEquals(Integer.toString(ff), mff.getAttribute("fragment.index")); - assertEquals("12", mff.getAttribute("fragment.count")); - } - - //last file should have 1 record (2 lines = 1 record + header) - mff = runner.getFlowFilesForRelationship(SelectHive_1_1QL.REL_SUCCESS).get(11); - in = new ByteArrayInputStream(mff.toByteArray()); - BufferedReader br = new BufferedReader(new InputStreamReader(in)); - assertEquals(2, br.lines().count()); - mff.assertAttributeExists("fragment.identifier"); - assertEquals(Integer.toString(11), mff.getAttribute("fragment.index")); - assertEquals("12", mff.getAttribute("fragment.count")); - runner.clearTransferState(); - } - - @Test - public void testMaxRowsPerFlowFileWithMaxFragments() throws ClassNotFoundException, SQLException, InitializationException, IOException { - - // load test data to database - final Connection con = ((DBCPService) runner.getControllerService("dbcp")).getConnection(); - Statement stmt = con.createStatement(); - InputStream in; - MockFlowFile mff; - - try { - stmt.execute("drop table TEST_QUERY_DB_TABLE"); - } catch (final SQLException sqle) { - // Ignore this error, probably a "table does not exist" since Derby doesn't yet support DROP IF EXISTS [DERBY-4842] - } - - stmt.execute("create table TEST_QUERY_DB_TABLE (id integer not null, name varchar(100), scale float, created_on timestamp, bignum bigint default 0)"); - int rowCount = 0; - //create larger row set - for (int batch = 0; batch < 100; batch++) { - stmt.execute("insert into TEST_QUERY_DB_TABLE (id, name, scale, created_on) VALUES (" + rowCount + ", 'Joe Smith', 1.0, '1962-09-23 03:23:34.234')"); - rowCount++; - } - - runner.setIncomingConnection(false); - runner.setProperty(SelectHive_1_1QL.HIVEQL_SELECT_QUERY, "SELECT * FROM TEST_QUERY_DB_TABLE"); - runner.setProperty(SelectHive_1_1QL.MAX_ROWS_PER_FLOW_FILE, "9"); - Integer maxFragments = 3; - runner.setProperty(SelectHive_1_1QL.MAX_FRAGMENTS, maxFragments.toString()); - - runner.run(); - runner.assertAllFlowFilesTransferred(SelectHive_1_1QL.REL_SUCCESS, maxFragments); - - for (int i = 0; i < maxFragments; i++) { - mff = runner.getFlowFilesForRelationship(SelectHive_1_1QL.REL_SUCCESS).get(i); - in = new ByteArrayInputStream(mff.toByteArray()); - assertEquals(9, getNumberOfRecordsFromStream(in)); - - mff.assertAttributeExists("fragment.identifier"); - assertEquals(Integer.toString(i), mff.getAttribute("fragment.index")); - assertEquals(maxFragments.toString(), mff.getAttribute("fragment.count")); - } - - runner.clearTransferState(); - } - - private long getNumberOfRecordsFromStream(InputStream in) throws IOException { - final DatumReader datumReader = new GenericDatumReader<>(); - try (DataFileStream dataFileReader = new DataFileStream<>(in, datumReader)) { - GenericRecord record = null; - long recordsFromStream = 0; - while (dataFileReader.hasNext()) { - // Reuse record object by passing it to next(). This saves us from - // allocating and garbage collecting many objects for files with - // many items. - record = dataFileReader.next(record); - recordsFromStream += 1; - } - - return recordsFromStream; - } - } - - /** - * Simple implementation only for SelectHive_1_1QL processor testing. - */ - private class DBCPServiceSimpleImpl extends AbstractControllerService implements Hive_1_1DBCPService { - - @Override - public String getIdentifier() { - return "dbcp"; - } - - @Override - public Connection getConnection() throws ProcessException { - try { - Class.forName("org.apache.derby.jdbc.EmbeddedDriver"); - return DriverManager.getConnection("jdbc:derby:" + DB_LOCATION + ";create=true"); - } catch (final Exception e) { - throw new ProcessException("getConnection failed: " + e); - } - } - - @Override - public String getConnectionURL() { - return "jdbc:derby:" + DB_LOCATION + ";create=true"; - } - } -} \ No newline at end of file diff --git a/nifi-nar-bundles/nifi-hive-bundle/nifi-hive_1_1-processors/src/test/java/org/apache/nifi/processors/hive/TestUpdateHive_1_1Table.java b/nifi-nar-bundles/nifi-hive-bundle/nifi-hive_1_1-processors/src/test/java/org/apache/nifi/processors/hive/TestUpdateHive_1_1Table.java deleted file mode 100644 index b7ea3ec046..0000000000 --- a/nifi-nar-bundles/nifi-hive-bundle/nifi-hive_1_1-processors/src/test/java/org/apache/nifi/processors/hive/TestUpdateHive_1_1Table.java +++ /dev/null @@ -1,444 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.nifi.processors.hive; - -import org.apache.commons.io.FileUtils; -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.Path; -import org.apache.nifi.controller.AbstractControllerService; -import org.apache.nifi.dbcp.DBCPService; -import org.apache.nifi.dbcp.hive.Hive_1_1DBCPService; -import org.apache.nifi.logging.ComponentLog; -import org.apache.nifi.processor.exception.ProcessException; -import org.apache.nifi.reporting.InitializationException; -import org.apache.nifi.schema.access.SchemaNotFoundException; -import org.apache.nifi.serialization.RecordReader; -import org.apache.nifi.serialization.SimpleRecordSchema; -import org.apache.nifi.serialization.record.MockRecordParser; -import org.apache.nifi.serialization.record.RecordField; -import org.apache.nifi.serialization.record.RecordFieldType; -import org.apache.nifi.util.MockFlowFile; -import org.apache.nifi.util.TestRunner; -import org.apache.nifi.util.TestRunners; -import org.junit.jupiter.api.BeforeEach; -import org.junit.jupiter.api.Test; -import org.junit.jupiter.api.condition.DisabledOnOs; -import org.junit.jupiter.api.condition.OS; -import org.junit.jupiter.api.io.TempDir; -import org.mockito.stubbing.Answer; - -import java.io.File; -import java.io.IOException; -import java.io.InputStream; -import java.sql.Connection; -import java.sql.ResultSet; -import java.sql.SQLException; -import java.sql.Statement; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.HashMap; -import java.util.List; -import java.util.Map; -import java.util.function.BiFunction; - -import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.assertTrue; -import static org.mockito.ArgumentMatchers.anyInt; -import static org.mockito.ArgumentMatchers.anyString; -import static org.mockito.Mockito.mock; -import static org.mockito.Mockito.when; - -@DisabledOnOs(OS.WINDOWS) -public class TestUpdateHive_1_1Table { - - private static final String TEST_CONF_PATH = "src/test/resources/core-site.xml"; - private static final String TARGET_HIVE = "target/hive"; - - private static final String[] SHOW_TABLES_COLUMN_NAMES = new String[]{"tab_name"}; - private static final String[][] SHOW_TABLES_RESULTSET = new String[][]{ - new String[]{"messages"}, - new String[]{"users"}, - }; - - private static final String[] DESC_MESSAGES_TABLE_COLUMN_NAMES = new String[]{"id", "msg"}; - private static final String[][] DESC_MESSAGES_TABLE_RESULTSET = new String[][]{ - new String[]{"# col_name", "data_type", "comment"}, - new String[]{"id", "int", ""}, - new String[]{"msg", "string", ""}, - new String[]{"", null, null}, - new String[]{"# Partition Information", null, null}, - new String[]{"# col_name", "data_type", "comment"}, - new String[]{"continent", "string", ""}, - new String[]{"country", "string", ""}, - new String[]{"", null, null}, - new String[]{"# Detailed Table Information", null, null}, - new String[]{"Location:", "hdfs://mycluster:8020/warehouse/tablespace/managed/hive/messages", null} - }; - - private static final String[] DESC_USERS_TABLE_COLUMN_NAMES = new String[]{"name", "favorite_number", "favorite_color", "scale"}; - private static final String[][] DESC_USERS_TABLE_RESULTSET = new String[][]{ - new String[]{"name", "string", ""}, - new String[]{"favorite_number", "int", ""}, - new String[]{"favorite_color", "string", ""}, - new String[]{"scale", "double", ""}, - new String[]{"", null, null}, - new String[]{"# Detailed Table Information", null, null}, - new String[]{"Location:", "hdfs://mycluster:8020/warehouse/tablespace/managed/hive/users", null} - }; - private static final String[][] DESC_EXTERNAL_USERS_TABLE_RESULTSET = new String[][]{ - new String[]{"name", "string", ""}, - new String[]{"favorite_number", "int", ""}, - new String[]{"favorite_color", "string", ""}, - new String[]{"scale", "double", ""}, - new String[]{"", null, null}, - new String[]{"# Detailed Table Information", null, null}, - new String[]{"Location:", "hdfs://mycluster:8020/path/to/users", null} - }; - - private static final String[] DESC_NEW_TABLE_COLUMN_NAMES = DESC_USERS_TABLE_COLUMN_NAMES; - private static final String[][] DESC_NEW_TABLE_RESULTSET = new String[][]{ - new String[]{"", null, null}, - new String[]{"name", "string", ""}, - new String[]{"favorite_number", "int", ""}, - new String[]{"favorite_color", "string", ""}, - new String[]{"scale", "double", ""}, - new String[]{"", null, null}, - new String[]{"# Detailed Table Information", null, null}, - new String[]{"Location:", "hdfs://mycluster:8020/warehouse/tablespace/managed/hive/_newTable", null} - }; - - private TestRunner runner; - private UpdateHive_1_1Table processor; - - @BeforeEach - public void setUp() { - - Configuration testConf = new Configuration(); - testConf.addResource(new Path(TEST_CONF_PATH)); - - // Delete any temp files from previous tests - try { - FileUtils.deleteDirectory(new File(TARGET_HIVE)); - } catch (IOException ioe) { - // Do nothing, directory may not have existed - } - - processor = new UpdateHive_1_1Table(); - } - - private void configure(final UpdateHive_1_1Table processor, final int numUsers) throws InitializationException { - configure(processor, numUsers, false, -1); - } - - private void configure(final UpdateHive_1_1Table processor, final int numUsers, boolean failOnCreateReader, int failAfter) throws InitializationException { - configure(processor, numUsers, failOnCreateReader, failAfter, null); - } - - private void configure(final UpdateHive_1_1Table processor, final int numUsers, final boolean failOnCreateReader, final int failAfter, - final BiFunction recordGenerator) throws InitializationException { - runner = TestRunners.newTestRunner(processor); - MockRecordParser readerFactory = new MockRecordParser() { - @Override - public RecordReader createRecordReader(Map variables, InputStream in, long inputLength, ComponentLog logger) throws IOException, SchemaNotFoundException { - if (failOnCreateReader) { - throw new SchemaNotFoundException("test"); - } - return super.createRecordReader(variables, in, inputLength, logger); - } - }; - List fields = Arrays.asList( - new RecordField("name", RecordFieldType.STRING.getDataType()), - new RecordField("favorite_number", RecordFieldType.INT.getDataType()), - new RecordField("favorite_color", RecordFieldType.STRING.getDataType()), - new RecordField("scale", RecordFieldType.DOUBLE.getDataType()) - ); - final SimpleRecordSchema recordSchema = new SimpleRecordSchema(fields); - for (final RecordField recordField : recordSchema.getFields()) { - readerFactory.addSchemaField(recordField.getFieldName(), recordField.getDataType().getFieldType(), recordField.isNullable()); - } - - if (recordGenerator == null) { - for (int i = 0; i < numUsers; i++) { - readerFactory.addRecord("name" + i, i, "blue" + i, i * 10.0); - } - } else { - recordGenerator.apply(numUsers, readerFactory); - } - - readerFactory.failAfter(failAfter); - - runner.addControllerService("mock-reader-factory", readerFactory); - runner.enableControllerService(readerFactory); - - runner.setProperty(UpdateHive_1_1Table.RECORD_READER, "mock-reader-factory"); - } - - @Test - public void testSetup(@TempDir java.nio.file.Path tempDir) throws Exception { - configure(processor, 0); - runner.assertNotValid(); - final File dbDir = tempDir.resolve("db").toFile(); - final DBCPService service = new MockHiveConnectionPool(dbDir.getAbsolutePath()); - runner.addControllerService("dbcp", service); - runner.enableControllerService(service); - runner.setProperty(UpdateHive_1_1Table.HIVE_DBCP_SERVICE, "dbcp"); - runner.assertNotValid(); - runner.assertNotValid(); - runner.setProperty(UpdateHive_1_1Table.TABLE_NAME, "users"); - runner.assertValid(); - runner.run(); - } - - @Test - public void testNoStatementsExecuted() throws Exception { - configure(processor, 1); - runner.setProperty(UpdateHive_1_1Table.TABLE_NAME, "users"); - final MockHiveConnectionPool service = new MockHiveConnectionPool("test"); - runner.addControllerService("dbcp", service); - runner.enableControllerService(service); - runner.setProperty(UpdateHive_1_1Table.HIVE_DBCP_SERVICE, "dbcp"); - runner.setProperty(UpdateHive_1_1Table.PARTITION_CLAUSE, "continent, country"); - HashMap attrs = new HashMap<>(); - attrs.put("continent", "Asia"); - attrs.put("country", "China"); - runner.enqueue(new byte[0], attrs); - runner.run(); - - runner.assertTransferCount(UpdateHive_1_1Table.REL_SUCCESS, 1); - final MockFlowFile flowFile = runner.getFlowFilesForRelationship(UpdateHive_1_1Table.REL_SUCCESS).get(0); - flowFile.assertAttributeEquals(UpdateHive_1_1Table.ATTR_OUTPUT_TABLE, "users"); - flowFile.assertAttributeEquals(UpdateHive_1_1Table.ATTR_OUTPUT_PATH, "hdfs://mycluster:8020/warehouse/tablespace/managed/hive/users"); - assertTrue(service.getExecutedStatements().isEmpty()); - } - - @Test - public void testCreateManagedTable() throws Exception { - configure(processor, 1); - runner.setProperty(UpdateHive_1_1Table.TABLE_NAME, "${table.name}"); - runner.setProperty(UpdateHive_1_1Table.CREATE_TABLE, UpdateHive_1_1Table.CREATE_IF_NOT_EXISTS); - runner.setProperty(UpdateHive_1_1Table.TABLE_STORAGE_FORMAT, UpdateHive_1_1Table.PARQUET); - final MockHiveConnectionPool service = new MockHiveConnectionPool("_newTable"); - runner.addControllerService("dbcp", service); - runner.enableControllerService(service); - runner.setProperty(UpdateHive_1_1Table.HIVE_DBCP_SERVICE, "dbcp"); - Map attrs = new HashMap<>(); - attrs.put("db.name", "default"); - attrs.put("table.name", "_newTable"); - runner.enqueue(new byte[0], attrs); - runner.run(); - - runner.assertTransferCount(UpdateHive_1_1Table.REL_SUCCESS, 1); - final MockFlowFile flowFile = runner.getFlowFilesForRelationship(UpdateHive_1_1Table.REL_SUCCESS).get(0); - flowFile.assertAttributeEquals(UpdateHive_1_1Table.ATTR_OUTPUT_TABLE, "_newTable"); - flowFile.assertAttributeEquals(UpdateHive_1_1Table.ATTR_OUTPUT_PATH, "hdfs://mycluster:8020/warehouse/tablespace/managed/hive/_newTable"); - List statements = service.getExecutedStatements(); - assertEquals(1, statements.size()); - assertEquals("CREATE TABLE IF NOT EXISTS `_newTable` (`name` STRING, `favorite_number` INT, `favorite_color` STRING, `scale` DOUBLE) STORED AS PARQUET", - statements.get(0)); - } - - @Test - public void testCreateManagedTableWithPartition() throws Exception { - configure(processor, 1); - runner.setProperty(UpdateHive_1_1Table.TABLE_NAME, "${table.name}"); - runner.setProperty(UpdateHive_1_1Table.CREATE_TABLE, UpdateHive_1_1Table.CREATE_IF_NOT_EXISTS); - runner.setProperty(UpdateHive_1_1Table.PARTITION_CLAUSE, "age int"); - runner.setProperty(UpdateHive_1_1Table.TABLE_STORAGE_FORMAT, UpdateHive_1_1Table.PARQUET); - final MockHiveConnectionPool service = new MockHiveConnectionPool("_newTable"); - runner.addControllerService("dbcp", service); - runner.enableControllerService(service); - runner.setProperty(UpdateHive_1_1Table.HIVE_DBCP_SERVICE, "dbcp"); - Map attrs = new HashMap<>(); - attrs.put("db.name", "default"); - attrs.put("table.name", "_newTable"); - attrs.put("age", "23"); - runner.enqueue(new byte[0], attrs); - runner.run(); - - runner.assertTransferCount(UpdateHive_1_1Table.REL_SUCCESS, 1); - final MockFlowFile flowFile = runner.getFlowFilesForRelationship(UpdateHive_1_1Table.REL_SUCCESS).get(0); - flowFile.assertAttributeEquals(UpdateHive_1_1Table.ATTR_OUTPUT_TABLE, "_newTable"); - flowFile.assertAttributeEquals(UpdateHive_1_1Table.ATTR_OUTPUT_PATH, "hdfs://mycluster:8020/warehouse/tablespace/managed/hive/_newTable"); - List statements = service.getExecutedStatements(); - assertEquals(1, statements.size()); - assertEquals("CREATE TABLE IF NOT EXISTS `_newTable` (`name` STRING, `favorite_number` INT, `favorite_color` STRING, `scale` DOUBLE) PARTITIONED BY (`age` int) STORED AS PARQUET", - statements.get(0)); - } - - @Test - public void testCreateExternalTable() throws Exception { - configure(processor, 1); - runner.setProperty(UpdateHive_1_1Table.TABLE_NAME, "${table.name}"); - runner.setProperty(UpdateHive_1_1Table.CREATE_TABLE, UpdateHive_1_1Table.CREATE_IF_NOT_EXISTS); - runner.setProperty(UpdateHive_1_1Table.TABLE_MANAGEMENT_STRATEGY, UpdateHive_1_1Table.EXTERNAL_TABLE); - runner.setProperty(UpdateHive_1_1Table.TABLE_STORAGE_FORMAT, UpdateHive_1_1Table.PARQUET); - final MockHiveConnectionPool service = new MockHiveConnectionPool("ext_users"); - runner.addControllerService("dbcp", service); - runner.enableControllerService(service); - runner.setProperty(UpdateHive_1_1Table.HIVE_DBCP_SERVICE, "dbcp"); - runner.assertNotValid(); // Needs location specified - runner.setProperty(UpdateHive_1_1Table.EXTERNAL_TABLE_LOCATION, "/path/to/users"); - runner.assertValid(); - Map attrs = new HashMap<>(); - attrs.put("db.name", "default"); - attrs.put("table.name", "ext_users"); - runner.enqueue(new byte[0], attrs); - runner.run(); - - runner.assertTransferCount(UpdateHive_1_1Table.REL_SUCCESS, 1); - final MockFlowFile flowFile = runner.getFlowFilesForRelationship(UpdateHive_1_1Table.REL_SUCCESS).get(0); - flowFile.assertAttributeEquals(UpdateHive_1_1Table.ATTR_OUTPUT_TABLE, "ext_users"); - flowFile.assertAttributeEquals(UpdateHive_1_1Table.ATTR_OUTPUT_PATH, "hdfs://mycluster:8020/path/to/users"); - List statements = service.getExecutedStatements(); - assertEquals(1, statements.size()); - assertEquals("CREATE EXTERNAL TABLE IF NOT EXISTS `ext_users` (`name` STRING, `favorite_number` INT, `favorite_color` STRING, `scale` DOUBLE) STORED AS PARQUET " - + "LOCATION '/path/to/users'", - statements.get(0)); - } - - @Test - public void testAddColumnsAndPartition() throws Exception { - configure(processor, 1); - runner.setProperty(UpdateHive_1_1Table.TABLE_NAME, "messages"); - final MockHiveConnectionPool service = new MockHiveConnectionPool("test"); - runner.addControllerService("dbcp", service); - runner.enableControllerService(service); - runner.setProperty(UpdateHive_1_1Table.HIVE_DBCP_SERVICE, "dbcp"); - runner.setProperty(UpdateHive_1_1Table.PARTITION_CLAUSE, "continent, country"); - HashMap attrs = new HashMap<>(); - attrs.put("continent", "Asia"); - attrs.put("country", "China"); - runner.enqueue(new byte[0], attrs); - runner.run(); - - runner.assertTransferCount(UpdateHive_1_1Table.REL_SUCCESS, 1); - final MockFlowFile flowFile = runner.getFlowFilesForRelationship(UpdateHive_1_1Table.REL_SUCCESS).get(0); - flowFile.assertAttributeEquals(UpdateHive_1_1Table.ATTR_OUTPUT_TABLE, "messages"); - flowFile.assertAttributeEquals(UpdateHive_1_1Table.ATTR_OUTPUT_PATH, "hdfs://mycluster:8020/warehouse/tablespace/managed/hive/messages/continent=Asia/country=China"); - List statements = service.getExecutedStatements(); - assertEquals(2, statements.size()); - // All columns from users table/data should be added to the table, and a new partition should be added - assertEquals("ALTER TABLE `messages` ADD COLUMNS (`name` STRING, `favorite_number` INT, `favorite_color` STRING, `scale` DOUBLE)", - statements.get(0)); - assertEquals("ALTER TABLE `messages` ADD IF NOT EXISTS PARTITION (`continent`='Asia', `country`='China')", - statements.get(1)); - } - - @Test - public void testMissingPartitionValues() throws Exception { - configure(processor, 1); - runner.setProperty(UpdateHive_1_1Table.TABLE_NAME, "messages"); - final DBCPService service = new MockHiveConnectionPool("test"); - runner.addControllerService("dbcp", service); - runner.enableControllerService(service); - runner.setProperty(UpdateHive_1_1Table.HIVE_DBCP_SERVICE, "dbcp"); - runner.enqueue(new byte[0]); - runner.run(); - - runner.assertTransferCount(UpdateHive_1_1Table.REL_SUCCESS, 0); - runner.assertTransferCount(UpdateHive_1_1Table.REL_FAILURE, 1); - } - - /** - * Simple implementation only for testing purposes - */ - private static class MockHiveConnectionPool extends AbstractControllerService implements Hive_1_1DBCPService { - private final String dbLocation; - - private final List executedStatements = new ArrayList<>(); - - MockHiveConnectionPool(final String dbLocation) { - this.dbLocation = dbLocation; - } - - @Override - public String getIdentifier() { - return "dbcp"; - } - - @Override - public Connection getConnection() throws ProcessException { - try { - Connection conn = mock(Connection.class); - Statement s = mock(Statement.class); - when(conn.createStatement()).thenReturn(s); - when(s.executeQuery(anyString())).thenAnswer((Answer) invocation -> { - final String query = (String) invocation.getArguments()[0]; - if ("SHOW TABLES".equals(query)) { - return new MockResultSet(SHOW_TABLES_COLUMN_NAMES, SHOW_TABLES_RESULTSET).createResultSet(); - } else if ("DESC FORMATTED `messages`".equals(query)) { - return new MockResultSet(DESC_MESSAGES_TABLE_COLUMN_NAMES, DESC_MESSAGES_TABLE_RESULTSET).createResultSet(); - } else if ("DESC FORMATTED `users`".equals(query)) { - return new MockResultSet(DESC_USERS_TABLE_COLUMN_NAMES, DESC_USERS_TABLE_RESULTSET).createResultSet(); - } else if ("DESC FORMATTED `ext_users`".equals(query)) { - return new MockResultSet(DESC_USERS_TABLE_COLUMN_NAMES, DESC_EXTERNAL_USERS_TABLE_RESULTSET).createResultSet(); - } else if ("DESC FORMATTED `_newTable`".equals(query)) { - return new MockResultSet(DESC_NEW_TABLE_COLUMN_NAMES, DESC_NEW_TABLE_RESULTSET).createResultSet(); - } else { - return new MockResultSet(new String[]{}, new String[][]{new String[]{}}).createResultSet(); - } - }); - when(s.execute(anyString())).thenAnswer((Answer) invocation -> { - executedStatements.add((String) invocation.getArguments()[0]); - return false; - }); - return conn; - } catch (final Exception e) { - e.printStackTrace(); - throw new ProcessException("getConnection failed: " + e); - } - } - - @Override - public String getConnectionURL() { - return "jdbc:fake:" + dbLocation; - } - - List getExecutedStatements() { - return executedStatements; - } - } - - private static class MockResultSet { - String[] colNames; - String[][] data; - int currentRow; - - MockResultSet(String[] colNames, String[][] data) { - this.colNames = colNames; - this.data = data; - currentRow = 0; - } - - ResultSet createResultSet() throws SQLException { - ResultSet rs = mock(ResultSet.class); - when(rs.next()).thenAnswer((Answer) invocation -> (data != null) && (++currentRow <= data.length)); - when(rs.getString(anyInt())).thenAnswer((Answer) invocation -> { - final int index = (int) invocation.getArguments()[0]; - if (index < 1) { - throw new SQLException("Columns start with index 1"); - } - if (currentRow > data.length) { - throw new SQLException("This result set is already closed"); - } - return data[currentRow - 1][index - 1]; - }); - - return rs; - } - } -} \ No newline at end of file diff --git a/nifi-nar-bundles/nifi-hive-bundle/nifi-hive_1_1-processors/src/test/resources/array_of_records.avsc b/nifi-nar-bundles/nifi-hive-bundle/nifi-hive_1_1-processors/src/test/resources/array_of_records.avsc deleted file mode 100644 index 19cac6e60f..0000000000 --- a/nifi-nar-bundles/nifi-hive-bundle/nifi-hive_1_1-processors/src/test/resources/array_of_records.avsc +++ /dev/null @@ -1,38 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - { - "namespace" : "org.apache.nifi", - "name" : "outer_record", - "type" : "record", - "fields" : [ { - "name" : "records", - "type" : { - "type" : "array", - "items" : { - "type" : "record", - "name" : "inner_record", - "fields" : [ { - "name" : "name", - "type" : "string" - }, { - "name" : "age", - "type" : "int" - } ] - } - } - } ] -} \ No newline at end of file diff --git a/nifi-nar-bundles/nifi-hive-bundle/nifi-hive_1_1-processors/src/test/resources/core-site-security.xml b/nifi-nar-bundles/nifi-hive-bundle/nifi-hive_1_1-processors/src/test/resources/core-site-security.xml deleted file mode 100644 index eefc74ecbe..0000000000 --- a/nifi-nar-bundles/nifi-hive-bundle/nifi-hive_1_1-processors/src/test/resources/core-site-security.xml +++ /dev/null @@ -1,30 +0,0 @@ - - - - - - fs.default.name - hdfs://hive - - - hadoop.security.authentication - kerberos - - - hadoop.security.authorization - true - - \ No newline at end of file diff --git a/nifi-nar-bundles/nifi-hive-bundle/nifi-hive_1_1-processors/src/test/resources/core-site.xml b/nifi-nar-bundles/nifi-hive-bundle/nifi-hive_1_1-processors/src/test/resources/core-site.xml deleted file mode 100644 index 8f4a91a4ec..0000000000 --- a/nifi-nar-bundles/nifi-hive-bundle/nifi-hive_1_1-processors/src/test/resources/core-site.xml +++ /dev/null @@ -1,22 +0,0 @@ - - - - - - fs.default.name - hdfs://hive - - \ No newline at end of file diff --git a/nifi-nar-bundles/nifi-hive-bundle/nifi-hive_1_1-processors/src/test/resources/fake.keytab b/nifi-nar-bundles/nifi-hive-bundle/nifi-hive_1_1-processors/src/test/resources/fake.keytab deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/nifi-nar-bundles/nifi-hive-bundle/nifi-hive_1_1-processors/src/test/resources/hive-site-security.xml b/nifi-nar-bundles/nifi-hive-bundle/nifi-hive_1_1-processors/src/test/resources/hive-site-security.xml deleted file mode 100644 index 4d64c951a4..0000000000 --- a/nifi-nar-bundles/nifi-hive-bundle/nifi-hive_1_1-processors/src/test/resources/hive-site-security.xml +++ /dev/null @@ -1,30 +0,0 @@ - - - - - - fs.default.name - hdfs://hive - - - hive.server2.authentication - KERBEROS - - - hadoop.security.authentication - kerberos - - \ No newline at end of file diff --git a/nifi-nar-bundles/nifi-hive-bundle/nifi-hive_1_1-processors/src/test/resources/hive-site.xml b/nifi-nar-bundles/nifi-hive-bundle/nifi-hive_1_1-processors/src/test/resources/hive-site.xml deleted file mode 100644 index 7e7f86cf28..0000000000 --- a/nifi-nar-bundles/nifi-hive-bundle/nifi-hive_1_1-processors/src/test/resources/hive-site.xml +++ /dev/null @@ -1,22 +0,0 @@ - - - - - - fs.default.name - file:/// - - \ No newline at end of file diff --git a/nifi-nar-bundles/nifi-hive-bundle/nifi-hive_1_1-processors/src/test/resources/krb5.conf b/nifi-nar-bundles/nifi-hive-bundle/nifi-hive_1_1-processors/src/test/resources/krb5.conf deleted file mode 100644 index 323da39be9..0000000000 --- a/nifi-nar-bundles/nifi-hive-bundle/nifi-hive_1_1-processors/src/test/resources/krb5.conf +++ /dev/null @@ -1,10 +0,0 @@ -[libdefaults] - default_realm = EXAMPLE.COM - dns_lookup_kdc = false - dns_lookup_realm = false - -[realms] - EXAMPLE.COM = { - kdc = kerberos.example.com - admin_server = kerberos.example.com - } \ No newline at end of file diff --git a/nifi-nar-bundles/nifi-hive-bundle/nifi-hive_1_1-processors/src/test/resources/user.avsc b/nifi-nar-bundles/nifi-hive-bundle/nifi-hive_1_1-processors/src/test/resources/user.avsc deleted file mode 100644 index 95ef6e4fd0..0000000000 --- a/nifi-nar-bundles/nifi-hive-bundle/nifi-hive_1_1-processors/src/test/resources/user.avsc +++ /dev/null @@ -1,26 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -{"namespace": "example.avro", - "type": "record", - "name": "User", - "fields": [ - {"name": "name", "type": "string"}, - {"name": "favorite_number", "type": ["int", "null"]}, - {"name": "favorite_color", "type": ["string", "null"]}, - {"name": "scale", "type": ["double", "null"]} - ] -} diff --git a/nifi-nar-bundles/nifi-hive-bundle/pom.xml b/nifi-nar-bundles/nifi-hive-bundle/pom.xml index b04ef2b45b..9f62a31d1d 100644 --- a/nifi-nar-bundles/nifi-hive-bundle/pom.xml +++ b/nifi-nar-bundles/nifi-hive-bundle/pom.xml @@ -29,10 +29,6 @@ nifi-hive-services-api nifi-hive-services-api-nar - nifi-hive-processors - nifi-hive-nar - nifi-hive_1_1-processors - nifi-hive_1_1-nar nifi-hive3-processors nifi-hive3-nar nifi-hive-test-utils @@ -104,20 +100,10 @@ ant 1.10.12 - - - xerces - xercesImpl - 2.12.2 - - 1.1.1 - 2.6.2 - 1.2.2 - 2.6.2 3.1.3 ${hive3.version} 1.22.0