mirror of https://github.com/apache/nifi.git
NIFI-5967: Add Hive 1.1 processors
Removed extra Kerberos properties, added LICENSEs to all Hive NARs, removed unnecessary NOTICE entries
This commit is contained in:
parent
8c58d51857
commit
cdf3c69208
|
@ -781,6 +781,23 @@ language governing permissions and limitations under the License. -->
|
||||||
</dependency>
|
</dependency>
|
||||||
</dependencies>
|
</dependencies>
|
||||||
</profile>
|
</profile>
|
||||||
|
<profile>
|
||||||
|
<id>include-hive1_1</id>
|
||||||
|
<!-- This profile handles the inclusion of Hive 1.1.x artifacts. The NAR
|
||||||
|
is quite large and makes the resultant binary distribution significantly
|
||||||
|
larger (150+ MB). -->
|
||||||
|
<activation>
|
||||||
|
<activeByDefault>false</activeByDefault>
|
||||||
|
</activation>
|
||||||
|
<dependencies>
|
||||||
|
<dependency>
|
||||||
|
<groupId>org.apache.nifi</groupId>
|
||||||
|
<artifactId>nifi-hive_1_1-nar</artifactId>
|
||||||
|
<version>1.9.0-SNAPSHOT</version>
|
||||||
|
<type>nar</type>
|
||||||
|
</dependency>
|
||||||
|
</dependencies>
|
||||||
|
</profile>
|
||||||
<profile>
|
<profile>
|
||||||
<id>include-hive3</id>
|
<id>include-hive3</id>
|
||||||
<!-- This profile handles the inclusion of Hive 3 artifacts. The NAR
|
<!-- This profile handles the inclusion of Hive 3 artifacts. The NAR
|
||||||
|
|
|
@ -0,0 +1,231 @@
|
||||||
|
|
||||||
|
Apache License
|
||||||
|
Version 2.0, January 2004
|
||||||
|
http://www.apache.org/licenses/
|
||||||
|
|
||||||
|
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
|
||||||
|
|
||||||
|
1. Definitions.
|
||||||
|
|
||||||
|
"License" shall mean the terms and conditions for use, reproduction,
|
||||||
|
and distribution as defined by Sections 1 through 9 of this document.
|
||||||
|
|
||||||
|
"Licensor" shall mean the copyright owner or entity authorized by
|
||||||
|
the copyright owner that is granting the License.
|
||||||
|
|
||||||
|
"Legal Entity" shall mean the union of the acting entity and all
|
||||||
|
other entities that control, are controlled by, or are under common
|
||||||
|
control with that entity. For the purposes of this definition,
|
||||||
|
"control" means (i) the power, direct or indirect, to cause the
|
||||||
|
direction or management of such entity, whether by contract or
|
||||||
|
otherwise, or (ii) ownership of fifty percent (50%) or more of the
|
||||||
|
outstanding shares, or (iii) beneficial ownership of such entity.
|
||||||
|
|
||||||
|
"You" (or "Your") shall mean an individual or Legal Entity
|
||||||
|
exercising permissions granted by this License.
|
||||||
|
|
||||||
|
"Source" form shall mean the preferred form for making modifications,
|
||||||
|
including but not limited to software source code, documentation
|
||||||
|
source, and configuration files.
|
||||||
|
|
||||||
|
"Object" form shall mean any form resulting from mechanical
|
||||||
|
transformation or translation of a Source form, including but
|
||||||
|
not limited to compiled object code, generated documentation,
|
||||||
|
and conversions to other media types.
|
||||||
|
|
||||||
|
"Work" shall mean the work of authorship, whether in Source or
|
||||||
|
Object form, made available under the License, as indicated by a
|
||||||
|
copyright notice that is included in or attached to the work
|
||||||
|
(an example is provided in the Appendix below).
|
||||||
|
|
||||||
|
"Derivative Works" shall mean any work, whether in Source or Object
|
||||||
|
form, that is based on (or derived from) the Work and for which the
|
||||||
|
editorial revisions, annotations, elaborations, or other modifications
|
||||||
|
represent, as a whole, an original work of authorship. For the purposes
|
||||||
|
of this License, Derivative Works shall not include works that remain
|
||||||
|
separable from, or merely link (or bind by name) to the interfaces of,
|
||||||
|
the Work and Derivative Works thereof.
|
||||||
|
|
||||||
|
"Contribution" shall mean any work of authorship, including
|
||||||
|
the original version of the Work and any modifications or additions
|
||||||
|
to that Work or Derivative Works thereof, that is intentionally
|
||||||
|
submitted to Licensor for inclusion in the Work by the copyright owner
|
||||||
|
or by an individual or Legal Entity authorized to submit on behalf of
|
||||||
|
the copyright owner. For the purposes of this definition, "submitted"
|
||||||
|
means any form of electronic, verbal, or written communication sent
|
||||||
|
to the Licensor or its representatives, including but not limited to
|
||||||
|
communication on electronic mailing lists, source code control systems,
|
||||||
|
and issue tracking systems that are managed by, or on behalf of, the
|
||||||
|
Licensor for the purpose of discussing and improving the Work, but
|
||||||
|
excluding communication that is conspicuously marked or otherwise
|
||||||
|
designated in writing by the copyright owner as "Not a Contribution."
|
||||||
|
|
||||||
|
"Contributor" shall mean Licensor and any individual or Legal Entity
|
||||||
|
on behalf of whom a Contribution has been received by Licensor and
|
||||||
|
subsequently incorporated within the Work.
|
||||||
|
|
||||||
|
2. Grant of Copyright License. Subject to the terms and conditions of
|
||||||
|
this License, each Contributor hereby grants to You a perpetual,
|
||||||
|
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||||
|
copyright license to reproduce, prepare Derivative Works of,
|
||||||
|
publicly display, publicly perform, sublicense, and distribute the
|
||||||
|
Work and such Derivative Works in Source or Object form.
|
||||||
|
|
||||||
|
3. Grant of Patent License. Subject to the terms and conditions of
|
||||||
|
this License, each Contributor hereby grants to You a perpetual,
|
||||||
|
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||||
|
(except as stated in this section) patent license to make, have made,
|
||||||
|
use, offer to sell, sell, import, and otherwise transfer the Work,
|
||||||
|
where such license applies only to those patent claims licensable
|
||||||
|
by such Contributor that are necessarily infringed by their
|
||||||
|
Contribution(s) alone or by combination of their Contribution(s)
|
||||||
|
with the Work to which such Contribution(s) was submitted. If You
|
||||||
|
institute patent litigation against any entity (including a
|
||||||
|
cross-claim or counterclaim in a lawsuit) alleging that the Work
|
||||||
|
or a Contribution incorporated within the Work constitutes direct
|
||||||
|
or contributory patent infringement, then any patent licenses
|
||||||
|
granted to You under this License for that Work shall terminate
|
||||||
|
as of the date such litigation is filed.
|
||||||
|
|
||||||
|
4. Redistribution. You may reproduce and distribute copies of the
|
||||||
|
Work or Derivative Works thereof in any medium, with or without
|
||||||
|
modifications, and in Source or Object form, provided that You
|
||||||
|
meet the following conditions:
|
||||||
|
|
||||||
|
(a) You must give any other recipients of the Work or
|
||||||
|
Derivative Works a copy of this License; and
|
||||||
|
|
||||||
|
(b) You must cause any modified files to carry prominent notices
|
||||||
|
stating that You changed the files; and
|
||||||
|
|
||||||
|
(c) You must retain, in the Source form of any Derivative Works
|
||||||
|
that You distribute, all copyright, patent, trademark, and
|
||||||
|
attribution notices from the Source form of the Work,
|
||||||
|
excluding those notices that do not pertain to any part of
|
||||||
|
the Derivative Works; and
|
||||||
|
|
||||||
|
(d) If the Work includes a "NOTICE" text file as part of its
|
||||||
|
distribution, then any Derivative Works that You distribute must
|
||||||
|
include a readable copy of the attribution notices contained
|
||||||
|
within such NOTICE file, excluding those notices that do not
|
||||||
|
pertain to any part of the Derivative Works, in at least one
|
||||||
|
of the following places: within a NOTICE text file distributed
|
||||||
|
as part of the Derivative Works; within the Source form or
|
||||||
|
documentation, if provided along with the Derivative Works; or,
|
||||||
|
within a display generated by the Derivative Works, if and
|
||||||
|
wherever such third-party notices normally appear. The contents
|
||||||
|
of the NOTICE file are for informational purposes only and
|
||||||
|
do not modify the License. You may add Your own attribution
|
||||||
|
notices within Derivative Works that You distribute, alongside
|
||||||
|
or as an addendum to the NOTICE text from the Work, provided
|
||||||
|
that such additional attribution notices cannot be construed
|
||||||
|
as modifying the License.
|
||||||
|
|
||||||
|
You may add Your own copyright statement to Your modifications and
|
||||||
|
may provide additional or different license terms and conditions
|
||||||
|
for use, reproduction, or distribution of Your modifications, or
|
||||||
|
for any such Derivative Works as a whole, provided Your use,
|
||||||
|
reproduction, and distribution of the Work otherwise complies with
|
||||||
|
the conditions stated in this License.
|
||||||
|
|
||||||
|
5. Submission of Contributions. Unless You explicitly state otherwise,
|
||||||
|
any Contribution intentionally submitted for inclusion in the Work
|
||||||
|
by You to the Licensor shall be under the terms and conditions of
|
||||||
|
this License, without any additional terms or conditions.
|
||||||
|
Notwithstanding the above, nothing herein shall supersede or modify
|
||||||
|
the terms of any separate license agreement you may have executed
|
||||||
|
with Licensor regarding such Contributions.
|
||||||
|
|
||||||
|
6. Trademarks. This License does not grant permission to use the trade
|
||||||
|
names, trademarks, service marks, or product names of the Licensor,
|
||||||
|
except as required for reasonable and customary use in describing the
|
||||||
|
origin of the Work and reproducing the content of the NOTICE file.
|
||||||
|
|
||||||
|
7. Disclaimer of Warranty. Unless required by applicable law or
|
||||||
|
agreed to in writing, Licensor provides the Work (and each
|
||||||
|
Contributor provides its Contributions) on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
||||||
|
implied, including, without limitation, any warranties or conditions
|
||||||
|
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
|
||||||
|
PARTICULAR PURPOSE. You are solely responsible for determining the
|
||||||
|
appropriateness of using or redistributing the Work and assume any
|
||||||
|
risks associated with Your exercise of permissions under this License.
|
||||||
|
|
||||||
|
8. Limitation of Liability. In no event and under no legal theory,
|
||||||
|
whether in tort (including negligence), contract, or otherwise,
|
||||||
|
unless required by applicable law (such as deliberate and grossly
|
||||||
|
negligent acts) or agreed to in writing, shall any Contributor be
|
||||||
|
liable to You for damages, including any direct, indirect, special,
|
||||||
|
incidental, or consequential damages of any character arising as a
|
||||||
|
result of this License or out of the use or inability to use the
|
||||||
|
Work (including but not limited to damages for loss of goodwill,
|
||||||
|
work stoppage, computer failure or malfunction, or any and all
|
||||||
|
other commercial damages or losses), even if such Contributor
|
||||||
|
has been advised of the possibility of such damages.
|
||||||
|
|
||||||
|
9. Accepting Warranty or Additional Liability. While redistributing
|
||||||
|
the Work or Derivative Works thereof, You may choose to offer,
|
||||||
|
and charge a fee for, acceptance of support, warranty, indemnity,
|
||||||
|
or other liability obligations and/or rights consistent with this
|
||||||
|
License. However, in accepting such obligations, You may act only
|
||||||
|
on Your own behalf and on Your sole responsibility, not on behalf
|
||||||
|
of any other Contributor, and only if You agree to indemnify,
|
||||||
|
defend, and hold each Contributor harmless for any liability
|
||||||
|
incurred by, or claims asserted against, such Contributor by reason
|
||||||
|
of your accepting any such warranty or additional liability.
|
||||||
|
|
||||||
|
END OF TERMS AND CONDITIONS
|
||||||
|
|
||||||
|
APPENDIX: How to apply the Apache License to your work.
|
||||||
|
|
||||||
|
To apply the Apache License to your work, attach the following
|
||||||
|
boilerplate notice, with the fields enclosed by brackets "[]"
|
||||||
|
replaced with your own identifying information. (Don't include
|
||||||
|
the brackets!) The text should be enclosed in the appropriate
|
||||||
|
comment syntax for the file format. We also recommend that a
|
||||||
|
file or class name and description of purpose be included on the
|
||||||
|
same "printed page" as the copyright notice for easier
|
||||||
|
identification within third-party archives.
|
||||||
|
|
||||||
|
Copyright [yyyy] [name of copyright owner]
|
||||||
|
|
||||||
|
Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
you may not use this file except in compliance with the License.
|
||||||
|
You may obtain a copy of the License at
|
||||||
|
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
Unless required by applicable law or agreed to in writing, software
|
||||||
|
distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
See the License for the specific language governing permissions and
|
||||||
|
limitations under the License.
|
||||||
|
|
||||||
|
APACHE NIFI SUBCOMPONENTS:
|
||||||
|
|
||||||
|
The Apache NiFi project contains subcomponents with separate copyright
|
||||||
|
notices and license terms. Your use of the source code for the these
|
||||||
|
subcomponents is subject to the terms and conditions of the following
|
||||||
|
licenses.
|
||||||
|
The binary distribution of this product bundles 'Bouncy Castle JDK 1.5'
|
||||||
|
under an MIT style license.
|
||||||
|
|
||||||
|
Copyright (c) 2000 - 2015 The Legion of the Bouncy Castle Inc. (http://www.bouncycastle.org)
|
||||||
|
|
||||||
|
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||||
|
of this software and associated documentation files (the "Software"), to deal
|
||||||
|
in the Software without restriction, including without limitation the rights
|
||||||
|
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
copies of the Software, and to permit persons to whom the Software is
|
||||||
|
furnished to do so, subject to the following conditions:
|
||||||
|
|
||||||
|
The above copyright notice and this permission notice shall be included in
|
||||||
|
all copies or substantial portions of the Software.
|
||||||
|
|
||||||
|
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
|
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
|
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||||
|
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
|
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||||
|
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||||
|
THE SOFTWARE.
|
|
@ -0,0 +1,31 @@
|
||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
package org.apache.nifi.dbcp.hive;
|
||||||
|
|
||||||
|
|
||||||
|
import org.apache.nifi.annotation.documentation.CapabilityDescription;
|
||||||
|
import org.apache.nifi.annotation.documentation.Tags;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Definition for Hive 1.1 Database Connection Pooling Service.
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
@Tags({"hive", "dbcp", "jdbc", "database", "connection", "pooling", "store"})
|
||||||
|
@CapabilityDescription("Provides Database Connection Pooling Service for Apache Hive 1.1.x. Connections can be asked from pool and returned after usage.")
|
||||||
|
public interface Hive_1_1DBCPService extends HiveDBCPService {
|
||||||
|
public String getConnectionURL();
|
||||||
|
}
|
|
@ -0,0 +1,231 @@
|
||||||
|
|
||||||
|
Apache License
|
||||||
|
Version 2.0, January 2004
|
||||||
|
http://www.apache.org/licenses/
|
||||||
|
|
||||||
|
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
|
||||||
|
|
||||||
|
1. Definitions.
|
||||||
|
|
||||||
|
"License" shall mean the terms and conditions for use, reproduction,
|
||||||
|
and distribution as defined by Sections 1 through 9 of this document.
|
||||||
|
|
||||||
|
"Licensor" shall mean the copyright owner or entity authorized by
|
||||||
|
the copyright owner that is granting the License.
|
||||||
|
|
||||||
|
"Legal Entity" shall mean the union of the acting entity and all
|
||||||
|
other entities that control, are controlled by, or are under common
|
||||||
|
control with that entity. For the purposes of this definition,
|
||||||
|
"control" means (i) the power, direct or indirect, to cause the
|
||||||
|
direction or management of such entity, whether by contract or
|
||||||
|
otherwise, or (ii) ownership of fifty percent (50%) or more of the
|
||||||
|
outstanding shares, or (iii) beneficial ownership of such entity.
|
||||||
|
|
||||||
|
"You" (or "Your") shall mean an individual or Legal Entity
|
||||||
|
exercising permissions granted by this License.
|
||||||
|
|
||||||
|
"Source" form shall mean the preferred form for making modifications,
|
||||||
|
including but not limited to software source code, documentation
|
||||||
|
source, and configuration files.
|
||||||
|
|
||||||
|
"Object" form shall mean any form resulting from mechanical
|
||||||
|
transformation or translation of a Source form, including but
|
||||||
|
not limited to compiled object code, generated documentation,
|
||||||
|
and conversions to other media types.
|
||||||
|
|
||||||
|
"Work" shall mean the work of authorship, whether in Source or
|
||||||
|
Object form, made available under the License, as indicated by a
|
||||||
|
copyright notice that is included in or attached to the work
|
||||||
|
(an example is provided in the Appendix below).
|
||||||
|
|
||||||
|
"Derivative Works" shall mean any work, whether in Source or Object
|
||||||
|
form, that is based on (or derived from) the Work and for which the
|
||||||
|
editorial revisions, annotations, elaborations, or other modifications
|
||||||
|
represent, as a whole, an original work of authorship. For the purposes
|
||||||
|
of this License, Derivative Works shall not include works that remain
|
||||||
|
separable from, or merely link (or bind by name) to the interfaces of,
|
||||||
|
the Work and Derivative Works thereof.
|
||||||
|
|
||||||
|
"Contribution" shall mean any work of authorship, including
|
||||||
|
the original version of the Work and any modifications or additions
|
||||||
|
to that Work or Derivative Works thereof, that is intentionally
|
||||||
|
submitted to Licensor for inclusion in the Work by the copyright owner
|
||||||
|
or by an individual or Legal Entity authorized to submit on behalf of
|
||||||
|
the copyright owner. For the purposes of this definition, "submitted"
|
||||||
|
means any form of electronic, verbal, or written communication sent
|
||||||
|
to the Licensor or its representatives, including but not limited to
|
||||||
|
communication on electronic mailing lists, source code control systems,
|
||||||
|
and issue tracking systems that are managed by, or on behalf of, the
|
||||||
|
Licensor for the purpose of discussing and improving the Work, but
|
||||||
|
excluding communication that is conspicuously marked or otherwise
|
||||||
|
designated in writing by the copyright owner as "Not a Contribution."
|
||||||
|
|
||||||
|
"Contributor" shall mean Licensor and any individual or Legal Entity
|
||||||
|
on behalf of whom a Contribution has been received by Licensor and
|
||||||
|
subsequently incorporated within the Work.
|
||||||
|
|
||||||
|
2. Grant of Copyright License. Subject to the terms and conditions of
|
||||||
|
this License, each Contributor hereby grants to You a perpetual,
|
||||||
|
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||||
|
copyright license to reproduce, prepare Derivative Works of,
|
||||||
|
publicly display, publicly perform, sublicense, and distribute the
|
||||||
|
Work and such Derivative Works in Source or Object form.
|
||||||
|
|
||||||
|
3. Grant of Patent License. Subject to the terms and conditions of
|
||||||
|
this License, each Contributor hereby grants to You a perpetual,
|
||||||
|
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||||
|
(except as stated in this section) patent license to make, have made,
|
||||||
|
use, offer to sell, sell, import, and otherwise transfer the Work,
|
||||||
|
where such license applies only to those patent claims licensable
|
||||||
|
by such Contributor that are necessarily infringed by their
|
||||||
|
Contribution(s) alone or by combination of their Contribution(s)
|
||||||
|
with the Work to which such Contribution(s) was submitted. If You
|
||||||
|
institute patent litigation against any entity (including a
|
||||||
|
cross-claim or counterclaim in a lawsuit) alleging that the Work
|
||||||
|
or a Contribution incorporated within the Work constitutes direct
|
||||||
|
or contributory patent infringement, then any patent licenses
|
||||||
|
granted to You under this License for that Work shall terminate
|
||||||
|
as of the date such litigation is filed.
|
||||||
|
|
||||||
|
4. Redistribution. You may reproduce and distribute copies of the
|
||||||
|
Work or Derivative Works thereof in any medium, with or without
|
||||||
|
modifications, and in Source or Object form, provided that You
|
||||||
|
meet the following conditions:
|
||||||
|
|
||||||
|
(a) You must give any other recipients of the Work or
|
||||||
|
Derivative Works a copy of this License; and
|
||||||
|
|
||||||
|
(b) You must cause any modified files to carry prominent notices
|
||||||
|
stating that You changed the files; and
|
||||||
|
|
||||||
|
(c) You must retain, in the Source form of any Derivative Works
|
||||||
|
that You distribute, all copyright, patent, trademark, and
|
||||||
|
attribution notices from the Source form of the Work,
|
||||||
|
excluding those notices that do not pertain to any part of
|
||||||
|
the Derivative Works; and
|
||||||
|
|
||||||
|
(d) If the Work includes a "NOTICE" text file as part of its
|
||||||
|
distribution, then any Derivative Works that You distribute must
|
||||||
|
include a readable copy of the attribution notices contained
|
||||||
|
within such NOTICE file, excluding those notices that do not
|
||||||
|
pertain to any part of the Derivative Works, in at least one
|
||||||
|
of the following places: within a NOTICE text file distributed
|
||||||
|
as part of the Derivative Works; within the Source form or
|
||||||
|
documentation, if provided along with the Derivative Works; or,
|
||||||
|
within a display generated by the Derivative Works, if and
|
||||||
|
wherever such third-party notices normally appear. The contents
|
||||||
|
of the NOTICE file are for informational purposes only and
|
||||||
|
do not modify the License. You may add Your own attribution
|
||||||
|
notices within Derivative Works that You distribute, alongside
|
||||||
|
or as an addendum to the NOTICE text from the Work, provided
|
||||||
|
that such additional attribution notices cannot be construed
|
||||||
|
as modifying the License.
|
||||||
|
|
||||||
|
You may add Your own copyright statement to Your modifications and
|
||||||
|
may provide additional or different license terms and conditions
|
||||||
|
for use, reproduction, or distribution of Your modifications, or
|
||||||
|
for any such Derivative Works as a whole, provided Your use,
|
||||||
|
reproduction, and distribution of the Work otherwise complies with
|
||||||
|
the conditions stated in this License.
|
||||||
|
|
||||||
|
5. Submission of Contributions. Unless You explicitly state otherwise,
|
||||||
|
any Contribution intentionally submitted for inclusion in the Work
|
||||||
|
by You to the Licensor shall be under the terms and conditions of
|
||||||
|
this License, without any additional terms or conditions.
|
||||||
|
Notwithstanding the above, nothing herein shall supersede or modify
|
||||||
|
the terms of any separate license agreement you may have executed
|
||||||
|
with Licensor regarding such Contributions.
|
||||||
|
|
||||||
|
6. Trademarks. This License does not grant permission to use the trade
|
||||||
|
names, trademarks, service marks, or product names of the Licensor,
|
||||||
|
except as required for reasonable and customary use in describing the
|
||||||
|
origin of the Work and reproducing the content of the NOTICE file.
|
||||||
|
|
||||||
|
7. Disclaimer of Warranty. Unless required by applicable law or
|
||||||
|
agreed to in writing, Licensor provides the Work (and each
|
||||||
|
Contributor provides its Contributions) on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
||||||
|
implied, including, without limitation, any warranties or conditions
|
||||||
|
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
|
||||||
|
PARTICULAR PURPOSE. You are solely responsible for determining the
|
||||||
|
appropriateness of using or redistributing the Work and assume any
|
||||||
|
risks associated with Your exercise of permissions under this License.
|
||||||
|
|
||||||
|
8. Limitation of Liability. In no event and under no legal theory,
|
||||||
|
whether in tort (including negligence), contract, or otherwise,
|
||||||
|
unless required by applicable law (such as deliberate and grossly
|
||||||
|
negligent acts) or agreed to in writing, shall any Contributor be
|
||||||
|
liable to You for damages, including any direct, indirect, special,
|
||||||
|
incidental, or consequential damages of any character arising as a
|
||||||
|
result of this License or out of the use or inability to use the
|
||||||
|
Work (including but not limited to damages for loss of goodwill,
|
||||||
|
work stoppage, computer failure or malfunction, or any and all
|
||||||
|
other commercial damages or losses), even if such Contributor
|
||||||
|
has been advised of the possibility of such damages.
|
||||||
|
|
||||||
|
9. Accepting Warranty or Additional Liability. While redistributing
|
||||||
|
the Work or Derivative Works thereof, You may choose to offer,
|
||||||
|
and charge a fee for, acceptance of support, warranty, indemnity,
|
||||||
|
or other liability obligations and/or rights consistent with this
|
||||||
|
License. However, in accepting such obligations, You may act only
|
||||||
|
on Your own behalf and on Your sole responsibility, not on behalf
|
||||||
|
of any other Contributor, and only if You agree to indemnify,
|
||||||
|
defend, and hold each Contributor harmless for any liability
|
||||||
|
incurred by, or claims asserted against, such Contributor by reason
|
||||||
|
of your accepting any such warranty or additional liability.
|
||||||
|
|
||||||
|
END OF TERMS AND CONDITIONS
|
||||||
|
|
||||||
|
APPENDIX: How to apply the Apache License to your work.
|
||||||
|
|
||||||
|
To apply the Apache License to your work, attach the following
|
||||||
|
boilerplate notice, with the fields enclosed by brackets "[]"
|
||||||
|
replaced with your own identifying information. (Don't include
|
||||||
|
the brackets!) The text should be enclosed in the appropriate
|
||||||
|
comment syntax for the file format. We also recommend that a
|
||||||
|
file or class name and description of purpose be included on the
|
||||||
|
same "printed page" as the copyright notice for easier
|
||||||
|
identification within third-party archives.
|
||||||
|
|
||||||
|
Copyright [yyyy] [name of copyright owner]
|
||||||
|
|
||||||
|
Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
you may not use this file except in compliance with the License.
|
||||||
|
You may obtain a copy of the License at
|
||||||
|
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
Unless required by applicable law or agreed to in writing, software
|
||||||
|
distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
See the License for the specific language governing permissions and
|
||||||
|
limitations under the License.
|
||||||
|
|
||||||
|
APACHE NIFI SUBCOMPONENTS:
|
||||||
|
|
||||||
|
The Apache NiFi project contains subcomponents with separate copyright
|
||||||
|
notices and license terms. Your use of the source code for the these
|
||||||
|
subcomponents is subject to the terms and conditions of the following
|
||||||
|
licenses.
|
||||||
|
The binary distribution of this product bundles 'Bouncy Castle JDK 1.5'
|
||||||
|
under an MIT style license.
|
||||||
|
|
||||||
|
Copyright (c) 2000 - 2015 The Legion of the Bouncy Castle Inc. (http://www.bouncycastle.org)
|
||||||
|
|
||||||
|
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||||
|
of this software and associated documentation files (the "Software"), to deal
|
||||||
|
in the Software without restriction, including without limitation the rights
|
||||||
|
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
copies of the Software, and to permit persons to whom the Software is
|
||||||
|
furnished to do so, subject to the following conditions:
|
||||||
|
|
||||||
|
The above copyright notice and this permission notice shall be included in
|
||||||
|
all copies or substantial portions of the Software.
|
||||||
|
|
||||||
|
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
|
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
|
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||||
|
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
|
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||||
|
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||||
|
THE SOFTWARE.
|
|
@ -0,0 +1,49 @@
|
||||||
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
<!--
|
||||||
|
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
contributor license agreements. See the NOTICE file distributed with
|
||||||
|
this work for additional information regarding copyright ownership.
|
||||||
|
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
(the "License"); you may not use this file except in compliance with
|
||||||
|
the License. You may obtain a copy of the License at
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
Unless required by applicable law or agreed to in writing, software
|
||||||
|
distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
See the License for the specific language governing permissions and
|
||||||
|
limitations under the License.
|
||||||
|
-->
|
||||||
|
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
|
||||||
|
<modelVersion>4.0.0</modelVersion>
|
||||||
|
|
||||||
|
<parent>
|
||||||
|
<groupId>org.apache.nifi</groupId>
|
||||||
|
<artifactId>nifi-hive-bundle</artifactId>
|
||||||
|
<version>1.9.0-SNAPSHOT</version>
|
||||||
|
</parent>
|
||||||
|
|
||||||
|
<artifactId>nifi-hive_1_1-nar</artifactId>
|
||||||
|
<version>1.9.0-SNAPSHOT</version>
|
||||||
|
<packaging>nar</packaging>
|
||||||
|
<properties>
|
||||||
|
<maven.javadoc.skip>true</maven.javadoc.skip>
|
||||||
|
<source.skip>true</source.skip>
|
||||||
|
<!-- Need to override hadoop.version here, for Hive and hadoop-client transitive dependencies -->
|
||||||
|
<hadoop.version>${hive11.hadoop.version}</hadoop.version>
|
||||||
|
</properties>
|
||||||
|
|
||||||
|
<dependencies>
|
||||||
|
<dependency>
|
||||||
|
<groupId>org.apache.nifi</groupId>
|
||||||
|
<artifactId>nifi-hive-services-api-nar</artifactId>
|
||||||
|
<version>1.9.0-SNAPSHOT</version>
|
||||||
|
<type>nar</type>
|
||||||
|
</dependency>
|
||||||
|
<dependency>
|
||||||
|
<groupId>org.apache.nifi</groupId>
|
||||||
|
<artifactId>nifi-hive_1_1-processors</artifactId>
|
||||||
|
<version>1.9.0-SNAPSHOT</version>
|
||||||
|
</dependency>
|
||||||
|
</dependencies>
|
||||||
|
|
||||||
|
</project>
|
|
@ -0,0 +1,231 @@
|
||||||
|
|
||||||
|
Apache License
|
||||||
|
Version 2.0, January 2004
|
||||||
|
http://www.apache.org/licenses/
|
||||||
|
|
||||||
|
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
|
||||||
|
|
||||||
|
1. Definitions.
|
||||||
|
|
||||||
|
"License" shall mean the terms and conditions for use, reproduction,
|
||||||
|
and distribution as defined by Sections 1 through 9 of this document.
|
||||||
|
|
||||||
|
"Licensor" shall mean the copyright owner or entity authorized by
|
||||||
|
the copyright owner that is granting the License.
|
||||||
|
|
||||||
|
"Legal Entity" shall mean the union of the acting entity and all
|
||||||
|
other entities that control, are controlled by, or are under common
|
||||||
|
control with that entity. For the purposes of this definition,
|
||||||
|
"control" means (i) the power, direct or indirect, to cause the
|
||||||
|
direction or management of such entity, whether by contract or
|
||||||
|
otherwise, or (ii) ownership of fifty percent (50%) or more of the
|
||||||
|
outstanding shares, or (iii) beneficial ownership of such entity.
|
||||||
|
|
||||||
|
"You" (or "Your") shall mean an individual or Legal Entity
|
||||||
|
exercising permissions granted by this License.
|
||||||
|
|
||||||
|
"Source" form shall mean the preferred form for making modifications,
|
||||||
|
including but not limited to software source code, documentation
|
||||||
|
source, and configuration files.
|
||||||
|
|
||||||
|
"Object" form shall mean any form resulting from mechanical
|
||||||
|
transformation or translation of a Source form, including but
|
||||||
|
not limited to compiled object code, generated documentation,
|
||||||
|
and conversions to other media types.
|
||||||
|
|
||||||
|
"Work" shall mean the work of authorship, whether in Source or
|
||||||
|
Object form, made available under the License, as indicated by a
|
||||||
|
copyright notice that is included in or attached to the work
|
||||||
|
(an example is provided in the Appendix below).
|
||||||
|
|
||||||
|
"Derivative Works" shall mean any work, whether in Source or Object
|
||||||
|
form, that is based on (or derived from) the Work and for which the
|
||||||
|
editorial revisions, annotations, elaborations, or other modifications
|
||||||
|
represent, as a whole, an original work of authorship. For the purposes
|
||||||
|
of this License, Derivative Works shall not include works that remain
|
||||||
|
separable from, or merely link (or bind by name) to the interfaces of,
|
||||||
|
the Work and Derivative Works thereof.
|
||||||
|
|
||||||
|
"Contribution" shall mean any work of authorship, including
|
||||||
|
the original version of the Work and any modifications or additions
|
||||||
|
to that Work or Derivative Works thereof, that is intentionally
|
||||||
|
submitted to Licensor for inclusion in the Work by the copyright owner
|
||||||
|
or by an individual or Legal Entity authorized to submit on behalf of
|
||||||
|
the copyright owner. For the purposes of this definition, "submitted"
|
||||||
|
means any form of electronic, verbal, or written communication sent
|
||||||
|
to the Licensor or its representatives, including but not limited to
|
||||||
|
communication on electronic mailing lists, source code control systems,
|
||||||
|
and issue tracking systems that are managed by, or on behalf of, the
|
||||||
|
Licensor for the purpose of discussing and improving the Work, but
|
||||||
|
excluding communication that is conspicuously marked or otherwise
|
||||||
|
designated in writing by the copyright owner as "Not a Contribution."
|
||||||
|
|
||||||
|
"Contributor" shall mean Licensor and any individual or Legal Entity
|
||||||
|
on behalf of whom a Contribution has been received by Licensor and
|
||||||
|
subsequently incorporated within the Work.
|
||||||
|
|
||||||
|
2. Grant of Copyright License. Subject to the terms and conditions of
|
||||||
|
this License, each Contributor hereby grants to You a perpetual,
|
||||||
|
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||||
|
copyright license to reproduce, prepare Derivative Works of,
|
||||||
|
publicly display, publicly perform, sublicense, and distribute the
|
||||||
|
Work and such Derivative Works in Source or Object form.
|
||||||
|
|
||||||
|
3. Grant of Patent License. Subject to the terms and conditions of
|
||||||
|
this License, each Contributor hereby grants to You a perpetual,
|
||||||
|
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||||
|
(except as stated in this section) patent license to make, have made,
|
||||||
|
use, offer to sell, sell, import, and otherwise transfer the Work,
|
||||||
|
where such license applies only to those patent claims licensable
|
||||||
|
by such Contributor that are necessarily infringed by their
|
||||||
|
Contribution(s) alone or by combination of their Contribution(s)
|
||||||
|
with the Work to which such Contribution(s) was submitted. If You
|
||||||
|
institute patent litigation against any entity (including a
|
||||||
|
cross-claim or counterclaim in a lawsuit) alleging that the Work
|
||||||
|
or a Contribution incorporated within the Work constitutes direct
|
||||||
|
or contributory patent infringement, then any patent licenses
|
||||||
|
granted to You under this License for that Work shall terminate
|
||||||
|
as of the date such litigation is filed.
|
||||||
|
|
||||||
|
4. Redistribution. You may reproduce and distribute copies of the
|
||||||
|
Work or Derivative Works thereof in any medium, with or without
|
||||||
|
modifications, and in Source or Object form, provided that You
|
||||||
|
meet the following conditions:
|
||||||
|
|
||||||
|
(a) You must give any other recipients of the Work or
|
||||||
|
Derivative Works a copy of this License; and
|
||||||
|
|
||||||
|
(b) You must cause any modified files to carry prominent notices
|
||||||
|
stating that You changed the files; and
|
||||||
|
|
||||||
|
(c) You must retain, in the Source form of any Derivative Works
|
||||||
|
that You distribute, all copyright, patent, trademark, and
|
||||||
|
attribution notices from the Source form of the Work,
|
||||||
|
excluding those notices that do not pertain to any part of
|
||||||
|
the Derivative Works; and
|
||||||
|
|
||||||
|
(d) If the Work includes a "NOTICE" text file as part of its
|
||||||
|
distribution, then any Derivative Works that You distribute must
|
||||||
|
include a readable copy of the attribution notices contained
|
||||||
|
within such NOTICE file, excluding those notices that do not
|
||||||
|
pertain to any part of the Derivative Works, in at least one
|
||||||
|
of the following places: within a NOTICE text file distributed
|
||||||
|
as part of the Derivative Works; within the Source form or
|
||||||
|
documentation, if provided along with the Derivative Works; or,
|
||||||
|
within a display generated by the Derivative Works, if and
|
||||||
|
wherever such third-party notices normally appear. The contents
|
||||||
|
of the NOTICE file are for informational purposes only and
|
||||||
|
do not modify the License. You may add Your own attribution
|
||||||
|
notices within Derivative Works that You distribute, alongside
|
||||||
|
or as an addendum to the NOTICE text from the Work, provided
|
||||||
|
that such additional attribution notices cannot be construed
|
||||||
|
as modifying the License.
|
||||||
|
|
||||||
|
You may add Your own copyright statement to Your modifications and
|
||||||
|
may provide additional or different license terms and conditions
|
||||||
|
for use, reproduction, or distribution of Your modifications, or
|
||||||
|
for any such Derivative Works as a whole, provided Your use,
|
||||||
|
reproduction, and distribution of the Work otherwise complies with
|
||||||
|
the conditions stated in this License.
|
||||||
|
|
||||||
|
5. Submission of Contributions. Unless You explicitly state otherwise,
|
||||||
|
any Contribution intentionally submitted for inclusion in the Work
|
||||||
|
by You to the Licensor shall be under the terms and conditions of
|
||||||
|
this License, without any additional terms or conditions.
|
||||||
|
Notwithstanding the above, nothing herein shall supersede or modify
|
||||||
|
the terms of any separate license agreement you may have executed
|
||||||
|
with Licensor regarding such Contributions.
|
||||||
|
|
||||||
|
6. Trademarks. This License does not grant permission to use the trade
|
||||||
|
names, trademarks, service marks, or product names of the Licensor,
|
||||||
|
except as required for reasonable and customary use in describing the
|
||||||
|
origin of the Work and reproducing the content of the NOTICE file.
|
||||||
|
|
||||||
|
7. Disclaimer of Warranty. Unless required by applicable law or
|
||||||
|
agreed to in writing, Licensor provides the Work (and each
|
||||||
|
Contributor provides its Contributions) on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
||||||
|
implied, including, without limitation, any warranties or conditions
|
||||||
|
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
|
||||||
|
PARTICULAR PURPOSE. You are solely responsible for determining the
|
||||||
|
appropriateness of using or redistributing the Work and assume any
|
||||||
|
risks associated with Your exercise of permissions under this License.
|
||||||
|
|
||||||
|
8. Limitation of Liability. In no event and under no legal theory,
|
||||||
|
whether in tort (including negligence), contract, or otherwise,
|
||||||
|
unless required by applicable law (such as deliberate and grossly
|
||||||
|
negligent acts) or agreed to in writing, shall any Contributor be
|
||||||
|
liable to You for damages, including any direct, indirect, special,
|
||||||
|
incidental, or consequential damages of any character arising as a
|
||||||
|
result of this License or out of the use or inability to use the
|
||||||
|
Work (including but not limited to damages for loss of goodwill,
|
||||||
|
work stoppage, computer failure or malfunction, or any and all
|
||||||
|
other commercial damages or losses), even if such Contributor
|
||||||
|
has been advised of the possibility of such damages.
|
||||||
|
|
||||||
|
9. Accepting Warranty or Additional Liability. While redistributing
|
||||||
|
the Work or Derivative Works thereof, You may choose to offer,
|
||||||
|
and charge a fee for, acceptance of support, warranty, indemnity,
|
||||||
|
or other liability obligations and/or rights consistent with this
|
||||||
|
License. However, in accepting such obligations, You may act only
|
||||||
|
on Your own behalf and on Your sole responsibility, not on behalf
|
||||||
|
of any other Contributor, and only if You agree to indemnify,
|
||||||
|
defend, and hold each Contributor harmless for any liability
|
||||||
|
incurred by, or claims asserted against, such Contributor by reason
|
||||||
|
of your accepting any such warranty or additional liability.
|
||||||
|
|
||||||
|
END OF TERMS AND CONDITIONS
|
||||||
|
|
||||||
|
APPENDIX: How to apply the Apache License to your work.
|
||||||
|
|
||||||
|
To apply the Apache License to your work, attach the following
|
||||||
|
boilerplate notice, with the fields enclosed by brackets "[]"
|
||||||
|
replaced with your own identifying information. (Don't include
|
||||||
|
the brackets!) The text should be enclosed in the appropriate
|
||||||
|
comment syntax for the file format. We also recommend that a
|
||||||
|
file or class name and description of purpose be included on the
|
||||||
|
same "printed page" as the copyright notice for easier
|
||||||
|
identification within third-party archives.
|
||||||
|
|
||||||
|
Copyright [yyyy] [name of copyright owner]
|
||||||
|
|
||||||
|
Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
you may not use this file except in compliance with the License.
|
||||||
|
You may obtain a copy of the License at
|
||||||
|
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
Unless required by applicable law or agreed to in writing, software
|
||||||
|
distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
See the License for the specific language governing permissions and
|
||||||
|
limitations under the License.
|
||||||
|
|
||||||
|
APACHE NIFI SUBCOMPONENTS:
|
||||||
|
|
||||||
|
The Apache NiFi project contains subcomponents with separate copyright
|
||||||
|
notices and license terms. Your use of the source code for the these
|
||||||
|
subcomponents is subject to the terms and conditions of the following
|
||||||
|
licenses.
|
||||||
|
The binary distribution of this product bundles 'Bouncy Castle JDK 1.5'
|
||||||
|
under an MIT style license.
|
||||||
|
|
||||||
|
Copyright (c) 2000 - 2015 The Legion of the Bouncy Castle Inc. (http://www.bouncycastle.org)
|
||||||
|
|
||||||
|
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||||
|
of this software and associated documentation files (the "Software"), to deal
|
||||||
|
in the Software without restriction, including without limitation the rights
|
||||||
|
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
copies of the Software, and to permit persons to whom the Software is
|
||||||
|
furnished to do so, subject to the following conditions:
|
||||||
|
|
||||||
|
The above copyright notice and this permission notice shall be included in
|
||||||
|
all copies or substantial portions of the Software.
|
||||||
|
|
||||||
|
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
|
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
|
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||||
|
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
|
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||||
|
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||||
|
THE SOFTWARE.
|
|
@ -0,0 +1,283 @@
|
||||||
|
nifi-hive_1_1-nar
|
||||||
|
Copyright 2014-2018 The Apache Software Foundation
|
||||||
|
|
||||||
|
This product includes software developed at
|
||||||
|
The Apache Software Foundation (http://www.apache.org/).
|
||||||
|
|
||||||
|
This includes derived works from the Apache Storm (ASLv2 licensed) project (https://github.com/apache/storm):
|
||||||
|
Copyright 2015 The Apache Software Foundation
|
||||||
|
The derived work is adapted from
|
||||||
|
org/apache/storm/hive/common/HiveWriter.java
|
||||||
|
org/apache/storm/hive/common/HiveOptions.java
|
||||||
|
and can be found in the org.apache.nifi.util.hive package
|
||||||
|
|
||||||
|
===========================================
|
||||||
|
Apache Software License v2
|
||||||
|
===========================================
|
||||||
|
|
||||||
|
The following binary components are provided under the Apache Software License v2
|
||||||
|
|
||||||
|
(ASLv2) Apache Ant
|
||||||
|
The following NOTICE information applies:
|
||||||
|
Apache Ant
|
||||||
|
Copyright 1999-2016 The Apache Software Foundation
|
||||||
|
|
||||||
|
(ASLv2) Apache Commons Codec
|
||||||
|
The following NOTICE information applies:
|
||||||
|
Apache Commons Codec
|
||||||
|
Copyright 2002-2014 The Apache Software Foundation
|
||||||
|
|
||||||
|
src/test/org/apache/commons/codec/language/DoubleMetaphoneTest.java
|
||||||
|
contains test data from http://aspell.net/test/orig/batch0.tab.
|
||||||
|
Copyright (C) 2002 Kevin Atkinson (kevina@gnu.org)
|
||||||
|
|
||||||
|
===============================================================================
|
||||||
|
|
||||||
|
The content of package org.apache.commons.codec.language.bm has been translated
|
||||||
|
from the original php source code available at http://stevemorse.org/phoneticinfo.htm
|
||||||
|
with permission from the original authors.
|
||||||
|
Original source copyright:
|
||||||
|
Copyright (c) 2008 Alexander Beider & Stephen P. Morse.
|
||||||
|
|
||||||
|
(ASLv2) Apache Commons DBCP
|
||||||
|
The following NOTICE information applies:
|
||||||
|
Apache Commons DBCP
|
||||||
|
Copyright 2001-2015 The Apache Software Foundation.
|
||||||
|
|
||||||
|
(ASLv2) Apache HttpComponents
|
||||||
|
The following NOTICE information applies:
|
||||||
|
Apache HttpComponents Client
|
||||||
|
Copyright 1999-2016 The Apache Software Foundation
|
||||||
|
Apache HttpComponents Core - HttpCore
|
||||||
|
Copyright 2006-2009 The Apache Software Foundation
|
||||||
|
|
||||||
|
(ASLv2) Apache Commons Logging
|
||||||
|
The following NOTICE information applies:
|
||||||
|
Apache Commons Logging
|
||||||
|
Copyright 2003-2014 The Apache Software Foundation
|
||||||
|
|
||||||
|
(ASLv2) Apache Commons Pool
|
||||||
|
The following NOTICE information applies:
|
||||||
|
Apache Commons Pool
|
||||||
|
Copyright 1999-2009 The Apache Software Foundation.
|
||||||
|
|
||||||
|
(ASLv2) Apache Commons IO
|
||||||
|
The following NOTICE information applies:
|
||||||
|
Apache Commons IO
|
||||||
|
Copyright 2002-2016 The Apache Software Foundation
|
||||||
|
|
||||||
|
(ASLv2) Apache Hive
|
||||||
|
The following NOTICE information applies:
|
||||||
|
Apache Hive
|
||||||
|
Copyright 2008-2015 The Apache Software Foundation
|
||||||
|
|
||||||
|
This product includes software developed by The Apache Software
|
||||||
|
Foundation (http://www.apache.org/).
|
||||||
|
|
||||||
|
This product includes Jersey (https://jersey.java.net/)
|
||||||
|
Copyright (c) 2010-2014 Oracle and/or its affiliates.
|
||||||
|
|
||||||
|
This project includes software copyrighted by Microsoft Corporation and
|
||||||
|
licensed under the Apache License, Version 2.0.
|
||||||
|
|
||||||
|
This project includes software copyrighted by Dell SecureWorks and
|
||||||
|
licensed under the Apache License, Version 2.0.
|
||||||
|
|
||||||
|
(ASLv2) Jackson JSON processor
|
||||||
|
The following NOTICE information applies:
|
||||||
|
# Jackson JSON processor
|
||||||
|
|
||||||
|
Jackson is a high-performance, Free/Open Source JSON processing library.
|
||||||
|
It was originally written by Tatu Saloranta (tatu.saloranta@iki.fi), and has
|
||||||
|
been in development since 2007.
|
||||||
|
It is currently developed by a community of developers, as well as supported
|
||||||
|
commercially by FasterXML.com.
|
||||||
|
|
||||||
|
## Licensing
|
||||||
|
|
||||||
|
Jackson core and extension components may licensed under different licenses.
|
||||||
|
To find the details that apply to this artifact see the accompanying LICENSE file.
|
||||||
|
For more information, including possible other licensing options, contact
|
||||||
|
FasterXML.com (http://fasterxml.com).
|
||||||
|
|
||||||
|
## Credits
|
||||||
|
|
||||||
|
A list of contributors may be found from CREDITS file, which is included
|
||||||
|
in some artifacts (usually source distributions); but is always available
|
||||||
|
from the source code management (SCM) system project uses.
|
||||||
|
|
||||||
|
(ASLv2) BoneCP
|
||||||
|
The following NOTICE information applies:
|
||||||
|
BoneCP
|
||||||
|
Copyright 2010 Wallace Wadge
|
||||||
|
|
||||||
|
(ASLv2) Apache Hadoop
|
||||||
|
The following NOTICE information applies:
|
||||||
|
The binary distribution of this product bundles binaries of
|
||||||
|
org.iq80.leveldb:leveldb-api (https://github.com/dain/leveldb), which has the
|
||||||
|
following notices:
|
||||||
|
* Copyright 2011 Dain Sundstrom <dain@iq80.com>
|
||||||
|
* Copyright 2011 FuseSource Corp. http://fusesource.com
|
||||||
|
|
||||||
|
The binary distribution of this product bundles binaries of
|
||||||
|
org.fusesource.hawtjni:hawtjni-runtime (https://github.com/fusesource/hawtjni),
|
||||||
|
which has the following notices:
|
||||||
|
* This product includes software developed by FuseSource Corp.
|
||||||
|
http://fusesource.com
|
||||||
|
* This product includes software developed at
|
||||||
|
Progress Software Corporation and/or its subsidiaries or affiliates.
|
||||||
|
* This product includes software developed by IBM Corporation and others.
|
||||||
|
|
||||||
|
(ASLv2) Apache Commons Lang
|
||||||
|
The following NOTICE information applies:
|
||||||
|
Apache Commons Lang
|
||||||
|
Copyright 2001-2015 The Apache Software Foundation
|
||||||
|
|
||||||
|
(ASLv2) Apache Curator
|
||||||
|
The following NOTICE information applies:
|
||||||
|
Apache Curator
|
||||||
|
Copyright 2013-2014 The Apache Software Foundation
|
||||||
|
|
||||||
|
(ASLv2) Apache Derby
|
||||||
|
The following NOTICE information applies:
|
||||||
|
Apache Derby
|
||||||
|
Copyright 2004-2014 Apache, Apache DB, Apache Derby, Apache Torque, Apache JDO, Apache DDLUtils,
|
||||||
|
the Derby hat logo, the Apache JDO logo, and the Apache feather logo are trademarks of The Apache Software Foundation.
|
||||||
|
|
||||||
|
(ASLv2) Apache DS
|
||||||
|
The following NOTICE information applies:
|
||||||
|
ApacheDS
|
||||||
|
Copyright 2003-2015 The Apache Software Foundation
|
||||||
|
|
||||||
|
(ASLv2) Apache Geronimo
|
||||||
|
The following NOTICE information applies:
|
||||||
|
Apache Geronimo
|
||||||
|
Copyright 2003-2008 The Apache Software Foundation
|
||||||
|
|
||||||
|
(ASLv2) HTrace Core
|
||||||
|
The following NOTICE information applies:
|
||||||
|
In addition, this product includes software dependencies. See
|
||||||
|
the accompanying LICENSE.txt for a listing of dependencies
|
||||||
|
that are NOT Apache licensed (with pointers to their licensing)
|
||||||
|
|
||||||
|
Apache HTrace includes an Apache Thrift connector to Zipkin. Zipkin
|
||||||
|
is a distributed tracing system that is Apache 2.0 Licensed.
|
||||||
|
Copyright 2012 Twitter, Inc.
|
||||||
|
|
||||||
|
(ASLv2) Jettison
|
||||||
|
The following NOTICE information applies:
|
||||||
|
Copyright 2006 Envoi Solutions LLC
|
||||||
|
|
||||||
|
(ASLv2) Jetty
|
||||||
|
The following NOTICE information applies:
|
||||||
|
Jetty Web Container
|
||||||
|
Copyright 1995-2017 Mort Bay Consulting Pty Ltd.
|
||||||
|
|
||||||
|
(ASLv2) Apache log4j
|
||||||
|
The following NOTICE information applies:
|
||||||
|
Apache log4j
|
||||||
|
Copyright 2007 The Apache Software Foundation
|
||||||
|
|
||||||
|
(ASLv2) Parquet MR
|
||||||
|
The following NOTICE information applies:
|
||||||
|
Parquet MR
|
||||||
|
Copyright 2012 Twitter, Inc.
|
||||||
|
|
||||||
|
This project includes code from https://github.com/lemire/JavaFastPFOR
|
||||||
|
parquet-column/src/main/java/parquet/column/values/bitpacking/LemireBitPacking.java
|
||||||
|
Apache License Version 2.0 http://www.apache.org/licenses/.
|
||||||
|
(c) Daniel Lemire, http://lemire.me/en/
|
||||||
|
|
||||||
|
(ASLv2) Apache Thrift
|
||||||
|
The following NOTICE information applies:
|
||||||
|
Apache Thrift
|
||||||
|
Copyright 2006-2010 The Apache Software Foundation.
|
||||||
|
|
||||||
|
(ASLv2) Apache Twill
|
||||||
|
The following NOTICE information applies:
|
||||||
|
Apache Twill
|
||||||
|
Copyright 2013-2016 The Apache Software Foundation
|
||||||
|
|
||||||
|
(ASLv2) Dropwizard Metrics
|
||||||
|
The following NOTICE information applies:
|
||||||
|
Metrics
|
||||||
|
Copyright 2010-2013 Coda Hale and Yammer, Inc.
|
||||||
|
|
||||||
|
This product includes code derived from the JSR-166 project (ThreadLocalRandom, Striped64,
|
||||||
|
LongAdder), which was released with the following comments:
|
||||||
|
|
||||||
|
Written by Doug Lea with assistance from members of JCP JSR-166
|
||||||
|
Expert Group and released to the public domain, as explained at
|
||||||
|
http://creativecommons.org/publicdomain/zero/1.0/
|
||||||
|
|
||||||
|
(ASLv2) Joda Time
|
||||||
|
The following NOTICE information applies:
|
||||||
|
This product includes software developed by
|
||||||
|
Joda.org (http://www.joda.org/).
|
||||||
|
|
||||||
|
(ASLv2) The Netty Project
|
||||||
|
The following NOTICE information applies:
|
||||||
|
The Netty Project
|
||||||
|
Copyright 2011 The Netty Project
|
||||||
|
|
||||||
|
(ASLv2) Apache Tomcat
|
||||||
|
The following NOTICE information applies:
|
||||||
|
Apache Tomcat
|
||||||
|
Copyright 2007 The Apache Software Foundation
|
||||||
|
|
||||||
|
Java Management Extensions (JMX) support is provided by
|
||||||
|
the MX4J package, which is open source software. The
|
||||||
|
original software and related information is available
|
||||||
|
at http://mx4j.sourceforge.net.
|
||||||
|
|
||||||
|
Java compilation software for JSP pages is provided by Eclipse,
|
||||||
|
which is open source software. The orginal software and
|
||||||
|
related infomation is available at
|
||||||
|
http://www.eclipse.org.
|
||||||
|
|
||||||
|
(ASLv2) Apache ZooKeeper
|
||||||
|
The following NOTICE information applies:
|
||||||
|
Apache ZooKeeper
|
||||||
|
Copyright 2009-2012 The Apache Software Foundation
|
||||||
|
|
||||||
|
(ASLv2) Google GSON
|
||||||
|
The following NOTICE information applies:
|
||||||
|
Copyright 2008 Google Inc.
|
||||||
|
|
||||||
|
(ASLv2) JPam
|
||||||
|
The following NOTICE information applies:
|
||||||
|
Copyright 2003-2006 Greg Luck
|
||||||
|
|
||||||
|
************************
|
||||||
|
Common Development and Distribution License 1.1
|
||||||
|
************************
|
||||||
|
|
||||||
|
The following binary components are provided under the Common Development and Distribution License 1.1. See project link for details.
|
||||||
|
|
||||||
|
(CDDL 1.1) (GPL2 w/ CPE) jersey-client (com.sun.jersey:jersey-client:jar:1.9 - https://jersey.java.net)
|
||||||
|
(CDDL 1.1) (GPL2 w/ CPE) jersey-core (com.sun.jersey:jersey-core:jar:1.9 - https://jersey.java.net/)
|
||||||
|
(CDDL 1.1) (GPL2 w/ CPE) jersey-json (com.sun.jersey:jersey-json:jar:1.9 - https://jersey.java.net/)
|
||||||
|
(CDDL 1.1) (GPL2 w/ CPE) jersey-server (com.sun.jersey:jersey-server:jar:1.9 - https://jersey.java.net/)
|
||||||
|
(CDDL 1.1) (GPL2 w/ CPE) jersey-guice (com.sun.jersey.contribs:jersey-guice:jar:1.9 - https://jersey.java.net/)
|
||||||
|
(CDDL 1.1) (GPL2 w/ CPE) Java Architecture For XML Binding (javax.xml.bind:jaxb-api:jar:2.2.2 - https://jaxb.dev.java.net/)
|
||||||
|
(CDDL 1.1) (GPL2 w/ CPE) JavaMail API (compat) (javax.mail:mail:jar:1.4.7 - http://kenai.com/projects/javamail/mail)
|
||||||
|
|
||||||
|
|
||||||
|
************************
|
||||||
|
Common Development and Distribution License 1.0
|
||||||
|
************************
|
||||||
|
|
||||||
|
The following binary components are provided under the Common Development and Distribution License 1.0. See project link for details.
|
||||||
|
|
||||||
|
(CDDL 1.0) JavaServlet(TM) Specification (javax.servlet:servlet-api:jar:2.5 - no url available)
|
||||||
|
(CDDL 1.0) (GPL3) Streaming API For XML (javax.xml.stream:stax-api:jar:1.0-2 - no url provided)
|
||||||
|
(CDDL 1.0) JavaBeans Activation Framework (JAF) (javax.activation:activation:jar:1.1 - http://java.sun.com/products/javabeans/jaf/index.jsp)
|
||||||
|
(CDDL 1.0) JavaServer Pages(TM) API (javax.servlet.jsp:jsp-api:jar:2.1 - http://jsp.java.net)
|
||||||
|
|
||||||
|
*****************
|
||||||
|
Public Domain
|
||||||
|
*****************
|
||||||
|
|
||||||
|
The following binary components are provided to the 'Public Domain'. See project link for details.
|
||||||
|
|
||||||
|
(Public Domain) AOP Alliance 1.0 (http://aopalliance.sourceforge.net/)
|
|
@ -0,0 +1,121 @@
|
||||||
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
<!--
|
||||||
|
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
contributor license agreements. See the NOTICE file distributed with
|
||||||
|
this work for additional information regarding copyright ownership.
|
||||||
|
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
(the "License"); you may not use this file except in compliance with
|
||||||
|
the License. You may obtain a copy of the License at
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
Unless required by applicable law or agreed to in writing, software
|
||||||
|
distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
See the License for the specific language governing permissions and
|
||||||
|
limitations under the License.
|
||||||
|
-->
|
||||||
|
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
|
||||||
|
<modelVersion>4.0.0</modelVersion>
|
||||||
|
|
||||||
|
<parent>
|
||||||
|
<groupId>org.apache.nifi</groupId>
|
||||||
|
<artifactId>nifi-hive-bundle</artifactId>
|
||||||
|
<version>1.9.0-SNAPSHOT</version>
|
||||||
|
</parent>
|
||||||
|
|
||||||
|
<artifactId>nifi-hive_1_1-processors</artifactId>
|
||||||
|
<packaging>jar</packaging>
|
||||||
|
|
||||||
|
<properties>
|
||||||
|
<!-- Need to override hadoop.version here, for Hive and hadoop-client transitive dependencies -->
|
||||||
|
<hadoop.version>${hive11.hadoop.version}</hadoop.version>
|
||||||
|
</properties>
|
||||||
|
|
||||||
|
<dependencies>
|
||||||
|
<dependency>
|
||||||
|
<groupId>org.apache.nifi</groupId>
|
||||||
|
<artifactId>nifi-api</artifactId>
|
||||||
|
<version>1.9.0-SNAPSHOT</version>
|
||||||
|
</dependency>
|
||||||
|
<dependency>
|
||||||
|
<groupId>org.apache.nifi</groupId>
|
||||||
|
<artifactId>nifi-processor-utils</artifactId>
|
||||||
|
<version>1.9.0-SNAPSHOT</version>
|
||||||
|
</dependency>
|
||||||
|
<dependency>
|
||||||
|
<groupId>org.apache.nifi</groupId>
|
||||||
|
<artifactId>nifi-dbcp-service-api</artifactId>
|
||||||
|
<version>1.9.0-SNAPSHOT</version>
|
||||||
|
<scope>provided</scope>
|
||||||
|
</dependency>
|
||||||
|
<dependency>
|
||||||
|
<groupId>org.apache.nifi</groupId>
|
||||||
|
<artifactId>nifi-hive-services-api</artifactId>
|
||||||
|
<version>1.9.0-SNAPSHOT</version>
|
||||||
|
<scope>provided</scope>
|
||||||
|
</dependency>
|
||||||
|
<dependency>
|
||||||
|
<groupId>org.apache.nifi</groupId>
|
||||||
|
<artifactId>nifi-kerberos-credentials-service-api</artifactId>
|
||||||
|
<scope>provided</scope>
|
||||||
|
</dependency>
|
||||||
|
<dependency>
|
||||||
|
<groupId>org.apache.hive</groupId>
|
||||||
|
<artifactId>hive-jdbc</artifactId>
|
||||||
|
<version>${hive11.version}</version>
|
||||||
|
<exclusions>
|
||||||
|
<exclusion>
|
||||||
|
<groupId>org.json</groupId>
|
||||||
|
<artifactId>json</artifactId>
|
||||||
|
</exclusion>
|
||||||
|
</exclusions>
|
||||||
|
</dependency>
|
||||||
|
<dependency>
|
||||||
|
<groupId>org.apache.hive.hcatalog</groupId>
|
||||||
|
<artifactId>hive-hcatalog-streaming</artifactId>
|
||||||
|
<version>${hive11.version}</version>
|
||||||
|
<exclusions>
|
||||||
|
<exclusion>
|
||||||
|
<groupId>org.slf4j</groupId>
|
||||||
|
<artifactId>slf4j-log4j12</artifactId>
|
||||||
|
</exclusion>
|
||||||
|
</exclusions>
|
||||||
|
</dependency>
|
||||||
|
<dependency>
|
||||||
|
<groupId>org.apache.hive.hcatalog</groupId>
|
||||||
|
<artifactId>hive-hcatalog-core</artifactId>
|
||||||
|
<version>${hive11.version}</version>
|
||||||
|
</dependency>
|
||||||
|
<dependency>
|
||||||
|
<groupId>org.apache.hadoop</groupId>
|
||||||
|
<artifactId>hadoop-client</artifactId>
|
||||||
|
<version>${hadoop.version}</version>
|
||||||
|
<exclusions>
|
||||||
|
<exclusion>
|
||||||
|
<groupId>com.google.code.findbugs</groupId>
|
||||||
|
<artifactId>jsr305</artifactId>
|
||||||
|
</exclusion>
|
||||||
|
</exclusions>
|
||||||
|
</dependency>
|
||||||
|
<dependency>
|
||||||
|
<groupId>org.apache.nifi</groupId>
|
||||||
|
<artifactId>nifi-hadoop-utils</artifactId>
|
||||||
|
<version>1.9.0-SNAPSHOT</version>
|
||||||
|
</dependency>
|
||||||
|
<dependency>
|
||||||
|
<groupId>com.github.stephenc.findbugs</groupId>
|
||||||
|
<artifactId>findbugs-annotations</artifactId>
|
||||||
|
<version>1.3.9-1</version>
|
||||||
|
</dependency>
|
||||||
|
<dependency>
|
||||||
|
<groupId>org.apache.commons</groupId>
|
||||||
|
<artifactId>commons-text</artifactId>
|
||||||
|
<version>1.4</version>
|
||||||
|
</dependency>
|
||||||
|
<dependency>
|
||||||
|
<groupId>org.apache.nifi</groupId>
|
||||||
|
<artifactId>nifi-mock</artifactId>
|
||||||
|
<version>1.9.0-SNAPSHOT</version>
|
||||||
|
<scope>test</scope>
|
||||||
|
</dependency>
|
||||||
|
</dependencies>
|
||||||
|
</project>
|
|
@ -0,0 +1,369 @@
|
||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
package org.apache.nifi.dbcp.hive;
|
||||||
|
|
||||||
|
|
||||||
|
import org.apache.commons.dbcp.BasicDataSource;
|
||||||
|
import org.apache.hadoop.conf.Configuration;
|
||||||
|
import org.apache.hadoop.security.UserGroupInformation;
|
||||||
|
import org.apache.hive.jdbc.HiveDriver;
|
||||||
|
import org.apache.nifi.annotation.behavior.RequiresInstanceClassLoading;
|
||||||
|
import org.apache.nifi.annotation.documentation.CapabilityDescription;
|
||||||
|
import org.apache.nifi.annotation.documentation.Tags;
|
||||||
|
import org.apache.nifi.annotation.lifecycle.OnDisabled;
|
||||||
|
import org.apache.nifi.annotation.lifecycle.OnEnabled;
|
||||||
|
import org.apache.nifi.components.PropertyDescriptor;
|
||||||
|
import org.apache.nifi.components.ValidationContext;
|
||||||
|
import org.apache.nifi.components.ValidationResult;
|
||||||
|
import org.apache.nifi.controller.AbstractControllerService;
|
||||||
|
import org.apache.nifi.controller.ConfigurationContext;
|
||||||
|
import org.apache.nifi.hadoop.SecurityUtil;
|
||||||
|
import org.apache.nifi.kerberos.KerberosCredentialsService;
|
||||||
|
import org.apache.nifi.logging.ComponentLog;
|
||||||
|
import org.apache.nifi.processor.exception.ProcessException;
|
||||||
|
import org.apache.nifi.processor.util.StandardValidators;
|
||||||
|
import org.apache.nifi.reporting.InitializationException;
|
||||||
|
import org.apache.nifi.util.hive.AuthenticationFailedException;
|
||||||
|
import org.apache.nifi.util.hive.HiveConfigurator;
|
||||||
|
import org.apache.nifi.util.hive.HiveUtils;
|
||||||
|
import org.apache.nifi.util.hive.ValidationResources;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.lang.reflect.UndeclaredThrowableException;
|
||||||
|
import java.security.PrivilegedExceptionAction;
|
||||||
|
import java.sql.Connection;
|
||||||
|
import java.sql.SQLException;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.Collection;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Map;
|
||||||
|
import java.util.concurrent.TimeUnit;
|
||||||
|
import java.util.concurrent.atomic.AtomicReference;
|
||||||
|
|
||||||
|
import org.apache.nifi.controller.ControllerServiceInitializationContext;
|
||||||
|
import org.apache.nifi.expression.ExpressionLanguageScope;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Implementation for Database Connection Pooling Service used for Apache Hive 1.1
|
||||||
|
* connections. Apache DBCP is used for connection pooling functionality.
|
||||||
|
*/
|
||||||
|
@RequiresInstanceClassLoading
|
||||||
|
@Tags({"hive", "dbcp", "jdbc", "database", "connection", "pooling", "store"})
|
||||||
|
@CapabilityDescription("Provides Database Connection Pooling Service for Apache Hive 1.1.x. Connections can be asked from pool and returned after usage.")
|
||||||
|
public class Hive_1_1ConnectionPool extends AbstractControllerService implements Hive_1_1DBCPService {
|
||||||
|
private static final String ALLOW_EXPLICIT_KEYTAB = "NIFI_ALLOW_EXPLICIT_KEYTAB";
|
||||||
|
|
||||||
|
public static final PropertyDescriptor DATABASE_URL = new PropertyDescriptor.Builder()
|
||||||
|
.name("hive-db-connect-url")
|
||||||
|
.displayName("Database Connection URL")
|
||||||
|
.description("A database connection URL used to connect to a database. May contain database system name, host, port, database name and some parameters."
|
||||||
|
+ " The exact syntax of a database connection URL is specified by the Hive documentation. For example, the server principal is often included "
|
||||||
|
+ "as a connection parameter when connecting to a secure Hive server.")
|
||||||
|
.defaultValue(null)
|
||||||
|
.addValidator(StandardValidators.NON_EMPTY_VALIDATOR)
|
||||||
|
.required(true)
|
||||||
|
.expressionLanguageSupported(ExpressionLanguageScope.VARIABLE_REGISTRY)
|
||||||
|
.build();
|
||||||
|
|
||||||
|
public static final PropertyDescriptor HIVE_CONFIGURATION_RESOURCES = new PropertyDescriptor.Builder()
|
||||||
|
.name("hive-config-resources")
|
||||||
|
.displayName("Hive Configuration Resources")
|
||||||
|
.description("A file or comma separated list of files which contains the Hive configuration (hive-site.xml, e.g.). Without this, Hadoop "
|
||||||
|
+ "will search the classpath for a 'hive-site.xml' file or will revert to a default configuration. Note that to enable authentication "
|
||||||
|
+ "with Kerberos e.g., the appropriate properties must be set in the configuration files. Please see the Hive documentation for more details.")
|
||||||
|
.required(false)
|
||||||
|
.addValidator(HiveUtils.createMultipleFilesExistValidator())
|
||||||
|
.expressionLanguageSupported(ExpressionLanguageScope.VARIABLE_REGISTRY)
|
||||||
|
.build();
|
||||||
|
|
||||||
|
public static final PropertyDescriptor DB_USER = new PropertyDescriptor.Builder()
|
||||||
|
.name("hive-db-user")
|
||||||
|
.displayName("Database User")
|
||||||
|
.description("Database user name")
|
||||||
|
.defaultValue(null)
|
||||||
|
.addValidator(StandardValidators.NON_EMPTY_VALIDATOR)
|
||||||
|
.expressionLanguageSupported(ExpressionLanguageScope.VARIABLE_REGISTRY)
|
||||||
|
.build();
|
||||||
|
|
||||||
|
public static final PropertyDescriptor DB_PASSWORD = new PropertyDescriptor.Builder()
|
||||||
|
.name("hive-db-password")
|
||||||
|
.displayName("Password")
|
||||||
|
.description("The password for the database user")
|
||||||
|
.defaultValue(null)
|
||||||
|
.required(false)
|
||||||
|
.sensitive(true)
|
||||||
|
.addValidator(StandardValidators.NON_EMPTY_VALIDATOR)
|
||||||
|
.expressionLanguageSupported(ExpressionLanguageScope.VARIABLE_REGISTRY)
|
||||||
|
.build();
|
||||||
|
|
||||||
|
public static final PropertyDescriptor MAX_WAIT_TIME = new PropertyDescriptor.Builder()
|
||||||
|
.name("hive-max-wait-time")
|
||||||
|
.displayName("Max Wait Time")
|
||||||
|
.description("The maximum amount of time that the pool will wait (when there are no available connections) "
|
||||||
|
+ " for a connection to be returned before failing, or -1 to wait indefinitely. ")
|
||||||
|
.defaultValue("500 millis")
|
||||||
|
.required(true)
|
||||||
|
.addValidator(StandardValidators.TIME_PERIOD_VALIDATOR)
|
||||||
|
.expressionLanguageSupported(ExpressionLanguageScope.VARIABLE_REGISTRY)
|
||||||
|
.build();
|
||||||
|
|
||||||
|
public static final PropertyDescriptor MAX_TOTAL_CONNECTIONS = new PropertyDescriptor.Builder()
|
||||||
|
.name("hive-max-total-connections")
|
||||||
|
.displayName("Max Total Connections")
|
||||||
|
.description("The maximum number of active connections that can be allocated from this pool at the same time, "
|
||||||
|
+ "or negative for no limit.")
|
||||||
|
.defaultValue("8")
|
||||||
|
.required(true)
|
||||||
|
.addValidator(StandardValidators.INTEGER_VALIDATOR)
|
||||||
|
.expressionLanguageSupported(ExpressionLanguageScope.VARIABLE_REGISTRY)
|
||||||
|
.build();
|
||||||
|
|
||||||
|
public static final PropertyDescriptor VALIDATION_QUERY = new PropertyDescriptor.Builder()
|
||||||
|
.name("Validation-query")
|
||||||
|
.displayName("Validation query")
|
||||||
|
.description("Validation query used to validate connections before returning them. "
|
||||||
|
+ "When a borrowed connection is invalid, it gets dropped and a new valid connection will be returned. "
|
||||||
|
+ "NOTE: Using validation may have a performance penalty.")
|
||||||
|
.required(false)
|
||||||
|
.addValidator(StandardValidators.NON_EMPTY_VALIDATOR)
|
||||||
|
.expressionLanguageSupported(ExpressionLanguageScope.VARIABLE_REGISTRY)
|
||||||
|
.build();
|
||||||
|
|
||||||
|
static final PropertyDescriptor KERBEROS_CREDENTIALS_SERVICE = new PropertyDescriptor.Builder()
|
||||||
|
.name("kerberos-credentials-service")
|
||||||
|
.displayName("Kerberos Credentials Service")
|
||||||
|
.description("Specifies the Kerberos Credentials Controller Service that should be used for authenticating with Kerberos")
|
||||||
|
.identifiesControllerService(KerberosCredentialsService.class)
|
||||||
|
.required(false)
|
||||||
|
.build();
|
||||||
|
|
||||||
|
|
||||||
|
private List<PropertyDescriptor> properties;
|
||||||
|
|
||||||
|
private String connectionUrl = "unknown";
|
||||||
|
|
||||||
|
// Holder of cached Configuration information so validation does not reload the same config over and over
|
||||||
|
private final AtomicReference<ValidationResources> validationResourceHolder = new AtomicReference<>();
|
||||||
|
|
||||||
|
private volatile BasicDataSource dataSource;
|
||||||
|
|
||||||
|
private volatile HiveConfigurator hiveConfigurator = new HiveConfigurator();
|
||||||
|
private volatile UserGroupInformation ugi;
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected void init(final ControllerServiceInitializationContext context) {
|
||||||
|
List<PropertyDescriptor> props = new ArrayList<>();
|
||||||
|
props.add(DATABASE_URL);
|
||||||
|
props.add(HIVE_CONFIGURATION_RESOURCES);
|
||||||
|
props.add(DB_USER);
|
||||||
|
props.add(DB_PASSWORD);
|
||||||
|
props.add(MAX_WAIT_TIME);
|
||||||
|
props.add(MAX_TOTAL_CONNECTIONS);
|
||||||
|
props.add(VALIDATION_QUERY);
|
||||||
|
props.add(KERBEROS_CREDENTIALS_SERVICE);
|
||||||
|
|
||||||
|
properties = props;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected List<PropertyDescriptor> getSupportedPropertyDescriptors() {
|
||||||
|
return properties;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected Collection<ValidationResult> customValidate(ValidationContext validationContext) {
|
||||||
|
boolean confFileProvided = validationContext.getProperty(HIVE_CONFIGURATION_RESOURCES).isSet();
|
||||||
|
|
||||||
|
final List<ValidationResult> problems = new ArrayList<>();
|
||||||
|
|
||||||
|
if (confFileProvided) {
|
||||||
|
final KerberosCredentialsService credentialsService = validationContext.getProperty(KERBEROS_CREDENTIALS_SERVICE).asControllerService(KerberosCredentialsService.class);
|
||||||
|
|
||||||
|
final String resolvedPrincipal;
|
||||||
|
final String resolvedKeytab;
|
||||||
|
if (credentialsService == null) {
|
||||||
|
resolvedPrincipal = null;
|
||||||
|
resolvedKeytab = null;
|
||||||
|
} else {
|
||||||
|
resolvedPrincipal = credentialsService.getPrincipal();
|
||||||
|
resolvedKeytab = credentialsService.getKeytab();
|
||||||
|
}
|
||||||
|
|
||||||
|
final String configFiles = validationContext.getProperty(HIVE_CONFIGURATION_RESOURCES).evaluateAttributeExpressions().getValue();
|
||||||
|
problems.addAll(hiveConfigurator.validate(configFiles, resolvedPrincipal, resolvedKeytab, validationResourceHolder, getLogger()));
|
||||||
|
}
|
||||||
|
|
||||||
|
return problems;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Configures connection pool by creating an instance of the
|
||||||
|
* {@link BasicDataSource} based on configuration provided with
|
||||||
|
* {@link ConfigurationContext}.
|
||||||
|
* <p>
|
||||||
|
* This operation makes no guarantees that the actual connection could be
|
||||||
|
* made since the underlying system may still go off-line during normal
|
||||||
|
* operation of the connection pool.
|
||||||
|
* <p/>
|
||||||
|
* As of Apache NiFi 1.5.0, due to changes made to
|
||||||
|
* {@link SecurityUtil#loginKerberos(Configuration, String, String)}, which is used by this class invoking
|
||||||
|
* {@link HiveConfigurator#authenticate(Configuration, String, String)}
|
||||||
|
* to authenticate a principal with Kerberos, Hive controller services no longer use a separate thread to
|
||||||
|
* relogin, and instead call {@link UserGroupInformation#checkTGTAndReloginFromKeytab()} from
|
||||||
|
* {@link Hive_1_1ConnectionPool#getConnection()}. The relogin request is performed in a synchronized block to prevent
|
||||||
|
* threads from requesting concurrent relogins. For more information, please read the documentation for
|
||||||
|
* {@link SecurityUtil#loginKerberos(Configuration, String, String)}.
|
||||||
|
* <p/>
|
||||||
|
* In previous versions of NiFi, a {@link org.apache.nifi.hadoop.KerberosTicketRenewer} was started by
|
||||||
|
* {@link HiveConfigurator#authenticate(Configuration, String, String, long)} when the Hive
|
||||||
|
* controller service was enabled. The use of a separate thread to explicitly relogin could cause race conditions
|
||||||
|
* with the implicit relogin attempts made by hadoop/Hive code on a thread that references the same
|
||||||
|
* {@link UserGroupInformation} instance. One of these threads could leave the
|
||||||
|
* {@link javax.security.auth.Subject} in {@link UserGroupInformation} to be cleared or in an unexpected state
|
||||||
|
* while the other thread is attempting to use the {@link javax.security.auth.Subject}, resulting in failed
|
||||||
|
* authentication attempts that would leave the Hive controller service in an unrecoverable state.
|
||||||
|
*
|
||||||
|
* @see SecurityUtil#loginKerberos(Configuration, String, String)
|
||||||
|
* @see HiveConfigurator#authenticate(Configuration, String, String)
|
||||||
|
* @see HiveConfigurator#authenticate(Configuration, String, String, long)
|
||||||
|
* @param context the configuration context
|
||||||
|
* @throws InitializationException if unable to create a database connection
|
||||||
|
*/
|
||||||
|
@OnEnabled
|
||||||
|
public void onConfigured(final ConfigurationContext context) throws InitializationException {
|
||||||
|
|
||||||
|
ComponentLog log = getLogger();
|
||||||
|
|
||||||
|
final String configFiles = context.getProperty(HIVE_CONFIGURATION_RESOURCES).evaluateAttributeExpressions().getValue();
|
||||||
|
final Configuration hiveConfig = hiveConfigurator.getConfigurationFromFiles(configFiles);
|
||||||
|
final String validationQuery = context.getProperty(VALIDATION_QUERY).evaluateAttributeExpressions().getValue();
|
||||||
|
|
||||||
|
// add any dynamic properties to the Hive configuration
|
||||||
|
for (final Map.Entry<PropertyDescriptor, String> entry : context.getProperties().entrySet()) {
|
||||||
|
final PropertyDescriptor descriptor = entry.getKey();
|
||||||
|
if (descriptor.isDynamic()) {
|
||||||
|
hiveConfig.set(descriptor.getName(), context.getProperty(descriptor).evaluateAttributeExpressions().getValue());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
final String drv = HiveDriver.class.getName();
|
||||||
|
if (SecurityUtil.isSecurityEnabled(hiveConfig)) {
|
||||||
|
final KerberosCredentialsService credentialsService = context.getProperty(KERBEROS_CREDENTIALS_SERVICE).asControllerService(KerberosCredentialsService.class);
|
||||||
|
|
||||||
|
final String resolvedPrincipal;
|
||||||
|
final String resolvedKeytab;
|
||||||
|
if (credentialsService == null) {
|
||||||
|
resolvedPrincipal = null;
|
||||||
|
resolvedKeytab = null;
|
||||||
|
} else {
|
||||||
|
resolvedPrincipal = credentialsService.getPrincipal();
|
||||||
|
resolvedKeytab = credentialsService.getKeytab();
|
||||||
|
}
|
||||||
|
|
||||||
|
log.info("Hive Security Enabled, logging in as principal {} with keytab {}", new Object[] {resolvedPrincipal, resolvedKeytab});
|
||||||
|
|
||||||
|
try {
|
||||||
|
ugi = hiveConfigurator.authenticate(hiveConfig, resolvedPrincipal, resolvedKeytab);
|
||||||
|
} catch (AuthenticationFailedException ae) {
|
||||||
|
log.error(ae.getMessage(), ae);
|
||||||
|
throw new InitializationException(ae);
|
||||||
|
}
|
||||||
|
|
||||||
|
getLogger().info("Successfully logged in as principal {} with keytab {}", new Object[] {resolvedPrincipal, resolvedKeytab});
|
||||||
|
}
|
||||||
|
|
||||||
|
final String user = context.getProperty(DB_USER).evaluateAttributeExpressions().getValue();
|
||||||
|
final String passw = context.getProperty(DB_PASSWORD).evaluateAttributeExpressions().getValue();
|
||||||
|
final Long maxWaitMillis = context.getProperty(MAX_WAIT_TIME).evaluateAttributeExpressions().asTimePeriod(TimeUnit.MILLISECONDS);
|
||||||
|
final Integer maxTotal = context.getProperty(MAX_TOTAL_CONNECTIONS).evaluateAttributeExpressions().asInteger();
|
||||||
|
|
||||||
|
dataSource = new BasicDataSource();
|
||||||
|
dataSource.setDriverClassName(drv);
|
||||||
|
|
||||||
|
connectionUrl = context.getProperty(DATABASE_URL).evaluateAttributeExpressions().getValue();
|
||||||
|
|
||||||
|
dataSource.setMaxWait(maxWaitMillis);
|
||||||
|
dataSource.setMaxActive(maxTotal);
|
||||||
|
|
||||||
|
if (validationQuery != null && !validationQuery.isEmpty()) {
|
||||||
|
dataSource.setValidationQuery(validationQuery);
|
||||||
|
dataSource.setTestOnBorrow(true);
|
||||||
|
}
|
||||||
|
|
||||||
|
dataSource.setUrl(connectionUrl);
|
||||||
|
dataSource.setUsername(user);
|
||||||
|
dataSource.setPassword(passw);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Shutdown pool, close all open connections.
|
||||||
|
*/
|
||||||
|
@OnDisabled
|
||||||
|
public void shutdown() {
|
||||||
|
try {
|
||||||
|
if(dataSource != null) {
|
||||||
|
dataSource.close();
|
||||||
|
}
|
||||||
|
} catch (final SQLException e) {
|
||||||
|
throw new ProcessException(e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Connection getConnection() throws ProcessException {
|
||||||
|
try {
|
||||||
|
if (ugi != null) {
|
||||||
|
synchronized(this) {
|
||||||
|
/*
|
||||||
|
* Make sure that only one thread can request that the UGI relogin at a time. This
|
||||||
|
* explicit relogin attempt is necessary due to the Hive client/thrift not implicitly handling
|
||||||
|
* the acquisition of a new TGT after the current one has expired.
|
||||||
|
* https://issues.apache.org/jira/browse/NIFI-5134
|
||||||
|
*/
|
||||||
|
ugi.checkTGTAndReloginFromKeytab();
|
||||||
|
}
|
||||||
|
try {
|
||||||
|
return ugi.doAs((PrivilegedExceptionAction<Connection>) () -> dataSource.getConnection());
|
||||||
|
} catch (UndeclaredThrowableException e) {
|
||||||
|
Throwable cause = e.getCause();
|
||||||
|
if (cause instanceof SQLException) {
|
||||||
|
throw (SQLException) cause;
|
||||||
|
} else {
|
||||||
|
throw e;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
getLogger().info("Simple Authentication");
|
||||||
|
return dataSource.getConnection();
|
||||||
|
}
|
||||||
|
} catch (SQLException | IOException | InterruptedException e) {
|
||||||
|
getLogger().error("Error getting Hive connection", e);
|
||||||
|
throw new ProcessException(e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String toString() {
|
||||||
|
return "HiveConnectionPool[id=" + getIdentifier() + "]";
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String getConnectionURL() {
|
||||||
|
return connectionUrl;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -0,0 +1,344 @@
|
||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
package org.apache.nifi.processors.hive;
|
||||||
|
|
||||||
|
import org.antlr.runtime.tree.CommonTree;
|
||||||
|
import org.apache.hadoop.hive.ql.parse.ASTNode;
|
||||||
|
import org.apache.hadoop.hive.ql.parse.ParseDriver;
|
||||||
|
import org.apache.hadoop.hive.ql.parse.ParseException;
|
||||||
|
import org.apache.nifi.components.PropertyDescriptor;
|
||||||
|
import org.apache.nifi.dbcp.hive.Hive_1_1DBCPService;
|
||||||
|
import org.apache.nifi.flowfile.FlowFile;
|
||||||
|
import org.apache.nifi.processor.AbstractSessionFactoryProcessor;
|
||||||
|
import org.apache.nifi.processor.ProcessSession;
|
||||||
|
import org.apache.nifi.processor.io.InputStreamCallback;
|
||||||
|
import org.apache.nifi.processor.util.StandardValidators;
|
||||||
|
import org.apache.nifi.stream.io.StreamUtils;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.io.InputStream;
|
||||||
|
import java.math.BigDecimal;
|
||||||
|
import java.nio.charset.Charset;
|
||||||
|
import java.sql.Date;
|
||||||
|
import java.sql.PreparedStatement;
|
||||||
|
import java.sql.SQLDataException;
|
||||||
|
import java.sql.SQLException;
|
||||||
|
import java.sql.Time;
|
||||||
|
import java.sql.Timestamp;
|
||||||
|
import java.sql.Types;
|
||||||
|
import java.util.Collections;
|
||||||
|
import java.util.HashMap;
|
||||||
|
import java.util.HashSet;
|
||||||
|
import java.util.Map;
|
||||||
|
import java.util.Set;
|
||||||
|
import java.util.TreeMap;
|
||||||
|
import java.util.regex.Matcher;
|
||||||
|
import java.util.regex.Pattern;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* An abstract base class for HiveQL processors to share common data, methods, etc.
|
||||||
|
*/
|
||||||
|
public abstract class AbstractHive_1_1QLProcessor extends AbstractSessionFactoryProcessor {
|
||||||
|
|
||||||
|
protected static final Pattern HIVEQL_TYPE_ATTRIBUTE_PATTERN = Pattern.compile("hiveql\\.args\\.(\\d+)\\.type");
|
||||||
|
protected static final Pattern NUMBER_PATTERN = Pattern.compile("-?\\d+");
|
||||||
|
static String ATTR_INPUT_TABLES = "query.input.tables";
|
||||||
|
static String ATTR_OUTPUT_TABLES = "query.output.tables";
|
||||||
|
|
||||||
|
|
||||||
|
public static final PropertyDescriptor HIVE_DBCP_SERVICE = new PropertyDescriptor.Builder()
|
||||||
|
.name("Hive Database Connection Pooling Service")
|
||||||
|
.description("The Hive Controller Service that is used to obtain connection(s) to the Hive database")
|
||||||
|
.required(true)
|
||||||
|
.identifiesControllerService(Hive_1_1DBCPService.class)
|
||||||
|
.build();
|
||||||
|
|
||||||
|
public static final PropertyDescriptor CHARSET = new PropertyDescriptor.Builder()
|
||||||
|
.name("hive-charset")
|
||||||
|
.displayName("Character Set")
|
||||||
|
.description("Specifies the character set of the record data.")
|
||||||
|
.required(true)
|
||||||
|
.defaultValue("UTF-8")
|
||||||
|
.addValidator(StandardValidators.CHARACTER_SET_VALIDATOR)
|
||||||
|
.build();
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Determines the HiveQL statement that should be executed for the given FlowFile
|
||||||
|
*
|
||||||
|
* @param session the session that can be used to access the given FlowFile
|
||||||
|
* @param flowFile the FlowFile whose HiveQL statement should be executed
|
||||||
|
* @return the HiveQL that is associated with the given FlowFile
|
||||||
|
*/
|
||||||
|
protected String getHiveQL(final ProcessSession session, final FlowFile flowFile, final Charset charset) {
|
||||||
|
// Read the HiveQL from the FlowFile's content
|
||||||
|
final byte[] buffer = new byte[(int) flowFile.getSize()];
|
||||||
|
session.read(flowFile, new InputStreamCallback() {
|
||||||
|
@Override
|
||||||
|
public void process(final InputStream in) throws IOException {
|
||||||
|
StreamUtils.fillBuffer(in, buffer);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
// Create the PreparedStatement to use for this FlowFile.
|
||||||
|
return new String(buffer, charset);
|
||||||
|
}
|
||||||
|
|
||||||
|
private class ParameterHolder {
|
||||||
|
String attributeName;
|
||||||
|
int jdbcType;
|
||||||
|
String value;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Sets all of the appropriate parameters on the given PreparedStatement, based on the given FlowFile attributes.
|
||||||
|
*
|
||||||
|
* @param stmt the statement to set the parameters on
|
||||||
|
* @param attributes the attributes from which to derive parameter indices, values, and types
|
||||||
|
* @throws SQLException if the PreparedStatement throws a SQLException when the appropriate setter is called
|
||||||
|
*/
|
||||||
|
protected int setParameters(int base, final PreparedStatement stmt, int paramCount, final Map<String, String> attributes) throws SQLException {
|
||||||
|
|
||||||
|
Map<Integer, ParameterHolder> parmMap = new TreeMap<Integer, ParameterHolder>();
|
||||||
|
|
||||||
|
for (final Map.Entry<String, String> entry : attributes.entrySet()) {
|
||||||
|
final String key = entry.getKey();
|
||||||
|
final Matcher matcher = HIVEQL_TYPE_ATTRIBUTE_PATTERN.matcher(key);
|
||||||
|
if (matcher.matches()) {
|
||||||
|
final int parameterIndex = Integer.parseInt(matcher.group(1));
|
||||||
|
if (parameterIndex >= base && parameterIndex < base + paramCount) {
|
||||||
|
final boolean isNumeric = NUMBER_PATTERN.matcher(entry.getValue()).matches();
|
||||||
|
if (!isNumeric) {
|
||||||
|
throw new SQLDataException("Value of the " + key + " attribute is '" + entry.getValue() + "', which is not a valid JDBC numeral jdbcType");
|
||||||
|
}
|
||||||
|
|
||||||
|
final String valueAttrName = "hiveql.args." + parameterIndex + ".value";
|
||||||
|
|
||||||
|
ParameterHolder ph = new ParameterHolder();
|
||||||
|
int realIndexLoc = parameterIndex - base +1;
|
||||||
|
|
||||||
|
ph.jdbcType = Integer.parseInt(entry.getValue());
|
||||||
|
ph.value = attributes.get(valueAttrName);
|
||||||
|
ph.attributeName = valueAttrName;
|
||||||
|
|
||||||
|
parmMap.put(realIndexLoc, ph);
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
// Now that's we've retrieved the correct number of parameters and it's sorted, let's set them.
|
||||||
|
for (final Map.Entry<Integer, ParameterHolder> entry : parmMap.entrySet()) {
|
||||||
|
final Integer index = entry.getKey();
|
||||||
|
final ParameterHolder ph = entry.getValue();
|
||||||
|
|
||||||
|
try {
|
||||||
|
setParameter(stmt, ph.attributeName, index, ph.value, ph.jdbcType);
|
||||||
|
} catch (final NumberFormatException nfe) {
|
||||||
|
throw new SQLDataException("The value of the " + ph.attributeName + " is '" + ph.value + "', which cannot be converted into the necessary data jdbcType", nfe);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return base + paramCount;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Determines how to map the given value to the appropriate JDBC data jdbcType and sets the parameter on the
|
||||||
|
* provided PreparedStatement
|
||||||
|
*
|
||||||
|
* @param stmt the PreparedStatement to set the parameter on
|
||||||
|
* @param attrName the name of the attribute that the parameter is coming from - for logging purposes
|
||||||
|
* @param parameterIndex the index of the HiveQL parameter to set
|
||||||
|
* @param parameterValue the value of the HiveQL parameter to set
|
||||||
|
* @param jdbcType the JDBC Type of the HiveQL parameter to set
|
||||||
|
* @throws SQLException if the PreparedStatement throws a SQLException when calling the appropriate setter
|
||||||
|
*/
|
||||||
|
protected void setParameter(final PreparedStatement stmt, final String attrName, final int parameterIndex, final String parameterValue, final int jdbcType) throws SQLException {
|
||||||
|
if (parameterValue == null) {
|
||||||
|
stmt.setNull(parameterIndex, jdbcType);
|
||||||
|
} else {
|
||||||
|
try {
|
||||||
|
switch (jdbcType) {
|
||||||
|
case Types.BIT:
|
||||||
|
case Types.BOOLEAN:
|
||||||
|
stmt.setBoolean(parameterIndex, Boolean.parseBoolean(parameterValue));
|
||||||
|
break;
|
||||||
|
case Types.TINYINT:
|
||||||
|
stmt.setByte(parameterIndex, Byte.parseByte(parameterValue));
|
||||||
|
break;
|
||||||
|
case Types.SMALLINT:
|
||||||
|
stmt.setShort(parameterIndex, Short.parseShort(parameterValue));
|
||||||
|
break;
|
||||||
|
case Types.INTEGER:
|
||||||
|
stmt.setInt(parameterIndex, Integer.parseInt(parameterValue));
|
||||||
|
break;
|
||||||
|
case Types.BIGINT:
|
||||||
|
stmt.setLong(parameterIndex, Long.parseLong(parameterValue));
|
||||||
|
break;
|
||||||
|
case Types.REAL:
|
||||||
|
stmt.setFloat(parameterIndex, Float.parseFloat(parameterValue));
|
||||||
|
break;
|
||||||
|
case Types.FLOAT:
|
||||||
|
case Types.DOUBLE:
|
||||||
|
stmt.setDouble(parameterIndex, Double.parseDouble(parameterValue));
|
||||||
|
break;
|
||||||
|
case Types.DECIMAL:
|
||||||
|
case Types.NUMERIC:
|
||||||
|
stmt.setBigDecimal(parameterIndex, new BigDecimal(parameterValue));
|
||||||
|
break;
|
||||||
|
case Types.DATE:
|
||||||
|
stmt.setDate(parameterIndex, new Date(Long.parseLong(parameterValue)));
|
||||||
|
break;
|
||||||
|
case Types.TIME:
|
||||||
|
stmt.setTime(parameterIndex, new Time(Long.parseLong(parameterValue)));
|
||||||
|
break;
|
||||||
|
case Types.TIMESTAMP:
|
||||||
|
stmt.setTimestamp(parameterIndex, new Timestamp(Long.parseLong(parameterValue)));
|
||||||
|
break;
|
||||||
|
case Types.CHAR:
|
||||||
|
case Types.VARCHAR:
|
||||||
|
case Types.LONGNVARCHAR:
|
||||||
|
case Types.LONGVARCHAR:
|
||||||
|
stmt.setString(parameterIndex, parameterValue);
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
stmt.setObject(parameterIndex, parameterValue, jdbcType);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
} catch (SQLException e) {
|
||||||
|
// Log which attribute/parameter had an error, then rethrow to be handled at the top level
|
||||||
|
getLogger().error("Error setting parameter {} to value from {} ({})", new Object[]{parameterIndex, attrName, parameterValue}, e);
|
||||||
|
throw e;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
protected static class TableName {
|
||||||
|
private final String database;
|
||||||
|
private final String table;
|
||||||
|
private final boolean input;
|
||||||
|
|
||||||
|
TableName(String database, String table, boolean input) {
|
||||||
|
this.database = database;
|
||||||
|
this.table = table;
|
||||||
|
this.input = input;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getDatabase() {
|
||||||
|
return database;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getTable() {
|
||||||
|
return table;
|
||||||
|
}
|
||||||
|
|
||||||
|
public boolean isInput() {
|
||||||
|
return input;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String toString() {
|
||||||
|
return database == null || database.isEmpty() ? table : database + '.' + table;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean equals(Object o) {
|
||||||
|
if (this == o) return true;
|
||||||
|
if (o == null || getClass() != o.getClass()) return false;
|
||||||
|
|
||||||
|
TableName tableName = (TableName) o;
|
||||||
|
|
||||||
|
if (input != tableName.input) return false;
|
||||||
|
if (database != null ? !database.equals(tableName.database) : tableName.database != null) return false;
|
||||||
|
return table.equals(tableName.table);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int hashCode() {
|
||||||
|
int result = database != null ? database.hashCode() : 0;
|
||||||
|
result = 31 * result + table.hashCode();
|
||||||
|
result = 31 * result + (input ? 1 : 0);
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
protected Set<TableName> findTableNames(final String query) {
|
||||||
|
final ASTNode node;
|
||||||
|
try {
|
||||||
|
node = new ParseDriver().parse(normalize(query));
|
||||||
|
} catch (ParseException e) {
|
||||||
|
// If failed to parse the query, just log a message, but continue.
|
||||||
|
getLogger().debug("Failed to parse query: {} due to {}", new Object[]{query, e}, e);
|
||||||
|
return Collections.emptySet();
|
||||||
|
}
|
||||||
|
|
||||||
|
final HashSet<TableName> tableNames = new HashSet<>();
|
||||||
|
findTableNames(node, tableNames);
|
||||||
|
return tableNames;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Normalize query.
|
||||||
|
* Hive resolves prepared statement parameters before executing a query,
|
||||||
|
* see {@link org.apache.hive.jdbc.HivePreparedStatement#updateSql(String, HashMap)} for detail.
|
||||||
|
* HiveParser does not expect '?' to be in a query string, and throws an Exception if there is one.
|
||||||
|
* In this normalize method, '?' is replaced to 'x' to avoid that.
|
||||||
|
*/
|
||||||
|
private String normalize(String query) {
|
||||||
|
return query.replace('?', 'x');
|
||||||
|
}
|
||||||
|
|
||||||
|
private void findTableNames(final Object obj, final Set<TableName> tableNames) {
|
||||||
|
if (!(obj instanceof CommonTree)) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
final CommonTree tree = (CommonTree) obj;
|
||||||
|
final int childCount = tree.getChildCount();
|
||||||
|
if ("TOK_TABNAME".equals(tree.getText())) {
|
||||||
|
final TableName tableName;
|
||||||
|
final boolean isInput = "TOK_TABREF".equals(tree.getParent().getText());
|
||||||
|
switch (childCount) {
|
||||||
|
case 1 :
|
||||||
|
tableName = new TableName(null, tree.getChild(0).getText(), isInput);
|
||||||
|
break;
|
||||||
|
case 2:
|
||||||
|
tableName = new TableName(tree.getChild(0).getText(), tree.getChild(1).getText(), isInput);
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
throw new IllegalStateException("TOK_TABNAME does not have expected children, childCount=" + childCount);
|
||||||
|
}
|
||||||
|
// If parent is TOK_TABREF, then it is an input table.
|
||||||
|
tableNames.add(tableName);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
for (int i = 0; i < childCount; i++) {
|
||||||
|
findTableNames(tree.getChild(i), tableNames);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
protected Map<String, String> toQueryTableAttributes(Set<TableName> tableNames) {
|
||||||
|
final Map<String, String> attributes = new HashMap<>();
|
||||||
|
for (TableName tableName : tableNames) {
|
||||||
|
final String attributeName = tableName.isInput() ? ATTR_INPUT_TABLES : ATTR_OUTPUT_TABLES;
|
||||||
|
if (attributes.containsKey(attributeName)) {
|
||||||
|
attributes.put(attributeName, attributes.get(attributeName) + "," + tableName);
|
||||||
|
} else {
|
||||||
|
attributes.put(attributeName, tableName.toString());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return attributes;
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,297 @@
|
||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
package org.apache.nifi.processors.hive;
|
||||||
|
|
||||||
|
import org.apache.commons.lang3.StringUtils;
|
||||||
|
import org.apache.nifi.annotation.behavior.InputRequirement;
|
||||||
|
import org.apache.nifi.annotation.behavior.InputRequirement.Requirement;
|
||||||
|
import org.apache.nifi.annotation.behavior.ReadsAttribute;
|
||||||
|
import org.apache.nifi.annotation.behavior.ReadsAttributes;
|
||||||
|
import org.apache.nifi.annotation.behavior.WritesAttribute;
|
||||||
|
import org.apache.nifi.annotation.behavior.WritesAttributes;
|
||||||
|
import org.apache.nifi.annotation.documentation.CapabilityDescription;
|
||||||
|
import org.apache.nifi.annotation.documentation.SeeAlso;
|
||||||
|
import org.apache.nifi.annotation.documentation.Tags;
|
||||||
|
import org.apache.nifi.annotation.lifecycle.OnScheduled;
|
||||||
|
import org.apache.nifi.components.PropertyDescriptor;
|
||||||
|
import org.apache.nifi.dbcp.hive.Hive_1_1DBCPService;
|
||||||
|
import org.apache.nifi.expression.ExpressionLanguageScope;
|
||||||
|
import org.apache.nifi.flowfile.FlowFile;
|
||||||
|
import org.apache.nifi.processor.ProcessContext;
|
||||||
|
import org.apache.nifi.processor.ProcessSession;
|
||||||
|
import org.apache.nifi.processor.ProcessSessionFactory;
|
||||||
|
import org.apache.nifi.processor.Relationship;
|
||||||
|
import org.apache.nifi.processor.exception.ProcessException;
|
||||||
|
import org.apache.nifi.processor.util.StandardValidators;
|
||||||
|
import org.apache.nifi.processor.util.pattern.ErrorTypes;
|
||||||
|
import org.apache.nifi.processor.util.pattern.ExceptionHandler;
|
||||||
|
import org.apache.nifi.processor.util.pattern.ExceptionHandler.OnError;
|
||||||
|
import org.apache.nifi.processor.util.pattern.PartialFunctions.FetchFlowFiles;
|
||||||
|
import org.apache.nifi.processor.util.pattern.PartialFunctions.InitConnection;
|
||||||
|
import org.apache.nifi.processor.util.pattern.Put;
|
||||||
|
import org.apache.nifi.processor.util.pattern.RollbackOnFailure;
|
||||||
|
import org.apache.nifi.processor.util.pattern.RoutingResult;
|
||||||
|
|
||||||
|
import java.nio.charset.Charset;
|
||||||
|
import java.sql.Connection;
|
||||||
|
import java.sql.PreparedStatement;
|
||||||
|
import java.sql.SQLException;
|
||||||
|
import java.sql.SQLNonTransientException;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.Collections;
|
||||||
|
import java.util.HashSet;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Set;
|
||||||
|
import java.util.concurrent.TimeUnit;
|
||||||
|
import java.util.regex.Pattern;
|
||||||
|
|
||||||
|
@SeeAlso(SelectHive_1_1QL.class)
|
||||||
|
@InputRequirement(Requirement.INPUT_REQUIRED)
|
||||||
|
@Tags({"sql", "hive", "put", "database", "update", "insert"})
|
||||||
|
@CapabilityDescription("Executes a HiveQL DDL/DML command (UPDATE, INSERT, e.g.). The content of an incoming FlowFile is expected to be the HiveQL command "
|
||||||
|
+ "to execute. The HiveQL command may use the ? to escape parameters. In this case, the parameters to use must exist as FlowFile attributes "
|
||||||
|
+ "with the naming convention hiveql.args.N.type and hiveql.args.N.value, where N is a positive integer. The hiveql.args.N.type is expected to be "
|
||||||
|
+ "a number indicating the JDBC Type. The content of the FlowFile is expected to be in UTF-8 format.")
|
||||||
|
@ReadsAttributes({
|
||||||
|
@ReadsAttribute(attribute = "hiveql.args.N.type", description = "Incoming FlowFiles are expected to be parametrized HiveQL statements. The type of each Parameter is specified as an integer "
|
||||||
|
+ "that represents the JDBC Type of the parameter."),
|
||||||
|
@ReadsAttribute(attribute = "hiveql.args.N.value", description = "Incoming FlowFiles are expected to be parametrized HiveQL statements. The value of the Parameters are specified as "
|
||||||
|
+ "hiveql.args.1.value, hiveql.args.2.value, hiveql.args.3.value, and so on. The type of the hiveql.args.1.value Parameter is specified by the hiveql.args.1.type attribute.")
|
||||||
|
})
|
||||||
|
@WritesAttributes({
|
||||||
|
@WritesAttribute(attribute = "query.input.tables", description = "This attribute is written on the flow files routed to the 'success' relationships, "
|
||||||
|
+ "and contains input table names (if any) in comma delimited 'databaseName.tableName' format."),
|
||||||
|
@WritesAttribute(attribute = "query.output.tables", description = "This attribute is written on the flow files routed to the 'success' relationships, "
|
||||||
|
+ "and contains the target table names in 'databaseName.tableName' format.")
|
||||||
|
})
|
||||||
|
public class PutHive_1_1QL extends AbstractHive_1_1QLProcessor {
|
||||||
|
|
||||||
|
public static final PropertyDescriptor BATCH_SIZE = new PropertyDescriptor.Builder()
|
||||||
|
.name("hive-batch-size")
|
||||||
|
.displayName("Batch Size")
|
||||||
|
.description("The preferred number of FlowFiles to put to the database in a single transaction")
|
||||||
|
.required(true)
|
||||||
|
.addValidator(StandardValidators.POSITIVE_INTEGER_VALIDATOR)
|
||||||
|
.defaultValue("100")
|
||||||
|
.build();
|
||||||
|
|
||||||
|
public static final PropertyDescriptor STATEMENT_DELIMITER = new PropertyDescriptor.Builder()
|
||||||
|
.name("statement-delimiter")
|
||||||
|
.displayName("Statement Delimiter")
|
||||||
|
.description("Statement Delimiter used to separate SQL statements in a multiple statement script")
|
||||||
|
.required(true)
|
||||||
|
.defaultValue(";")
|
||||||
|
.addValidator(StandardValidators.NON_EMPTY_VALIDATOR)
|
||||||
|
.expressionLanguageSupported(ExpressionLanguageScope.NONE)
|
||||||
|
.build();
|
||||||
|
|
||||||
|
public static final Relationship REL_SUCCESS = new Relationship.Builder()
|
||||||
|
.name("success")
|
||||||
|
.description("A FlowFile is routed to this relationship after the database is successfully updated")
|
||||||
|
.build();
|
||||||
|
public static final Relationship REL_RETRY = new Relationship.Builder()
|
||||||
|
.name("retry")
|
||||||
|
.description("A FlowFile is routed to this relationship if the database cannot be updated but attempting the operation again may succeed")
|
||||||
|
.build();
|
||||||
|
public static final Relationship REL_FAILURE = new Relationship.Builder()
|
||||||
|
.name("failure")
|
||||||
|
.description("A FlowFile is routed to this relationship if the database cannot be updated and retrying the operation will also fail, "
|
||||||
|
+ "such as an invalid query or an integrity constraint violation")
|
||||||
|
.build();
|
||||||
|
|
||||||
|
|
||||||
|
private final static List<PropertyDescriptor> propertyDescriptors;
|
||||||
|
private final static Set<Relationship> relationships;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Will ensure that the list of property descriptors is built only once.
|
||||||
|
* Will also create a Set of relationships
|
||||||
|
*/
|
||||||
|
static {
|
||||||
|
List<PropertyDescriptor> _propertyDescriptors = new ArrayList<>();
|
||||||
|
_propertyDescriptors.add(HIVE_DBCP_SERVICE);
|
||||||
|
_propertyDescriptors.add(BATCH_SIZE);
|
||||||
|
_propertyDescriptors.add(CHARSET);
|
||||||
|
_propertyDescriptors.add(STATEMENT_DELIMITER);
|
||||||
|
_propertyDescriptors.add(RollbackOnFailure.ROLLBACK_ON_FAILURE);
|
||||||
|
propertyDescriptors = Collections.unmodifiableList(_propertyDescriptors);
|
||||||
|
|
||||||
|
Set<Relationship> _relationships = new HashSet<>();
|
||||||
|
_relationships.add(REL_SUCCESS);
|
||||||
|
_relationships.add(REL_FAILURE);
|
||||||
|
_relationships.add(REL_RETRY);
|
||||||
|
relationships = Collections.unmodifiableSet(_relationships);
|
||||||
|
}
|
||||||
|
|
||||||
|
private Put<FunctionContext, Connection> process;
|
||||||
|
private ExceptionHandler<FunctionContext> exceptionHandler;
|
||||||
|
|
||||||
|
@OnScheduled
|
||||||
|
public void constructProcess() {
|
||||||
|
exceptionHandler = new ExceptionHandler<>();
|
||||||
|
exceptionHandler.mapException(e -> {
|
||||||
|
if (e instanceof SQLNonTransientException) {
|
||||||
|
return ErrorTypes.InvalidInput;
|
||||||
|
} else if (e instanceof SQLException) {
|
||||||
|
// Use the SQLException's vendor code for guidance -- see Hive's ErrorMsg class for details on error codes
|
||||||
|
int errorCode = ((SQLException) e).getErrorCode();
|
||||||
|
getLogger().debug("Error occurred during Hive operation, Hive returned error code {}", new Object[]{errorCode});
|
||||||
|
if (errorCode >= 10000 && errorCode < 20000) {
|
||||||
|
return ErrorTypes.InvalidInput;
|
||||||
|
} else if (errorCode >= 20000 && errorCode < 30000) {
|
||||||
|
return ErrorTypes.InvalidInput;
|
||||||
|
} else if (errorCode >= 30000 && errorCode < 40000) {
|
||||||
|
return ErrorTypes.TemporalInputFailure;
|
||||||
|
} else if (errorCode >= 40000 && errorCode < 50000) {
|
||||||
|
// These are unknown errors (to include some parse errors), but rather than generating an UnknownFailure which causes
|
||||||
|
// a ProcessException, we'll route to failure via an InvalidInput error type.
|
||||||
|
return ErrorTypes.InvalidInput;
|
||||||
|
} else {
|
||||||
|
// Default unknown errors to TemporalFailure (as they were implemented originally), so they can be routed to failure
|
||||||
|
// or rolled back depending on the user's setting of Rollback On Failure.
|
||||||
|
return ErrorTypes.TemporalFailure;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
return ErrorTypes.UnknownFailure;
|
||||||
|
}
|
||||||
|
});
|
||||||
|
exceptionHandler.adjustError(RollbackOnFailure.createAdjustError(getLogger()));
|
||||||
|
|
||||||
|
process = new Put<>();
|
||||||
|
process.setLogger(getLogger());
|
||||||
|
process.initConnection(initConnection);
|
||||||
|
process.fetchFlowFiles(fetchFlowFiles);
|
||||||
|
process.putFlowFile(putFlowFile);
|
||||||
|
process.adjustRoute(RollbackOnFailure.createAdjustRoute(REL_FAILURE, REL_RETRY));
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected List<PropertyDescriptor> getSupportedPropertyDescriptors() {
|
||||||
|
return propertyDescriptors;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Set<Relationship> getRelationships() {
|
||||||
|
return relationships;
|
||||||
|
}
|
||||||
|
|
||||||
|
private class FunctionContext extends RollbackOnFailure {
|
||||||
|
final Charset charset;
|
||||||
|
final String statementDelimiter;
|
||||||
|
final long startNanos = System.nanoTime();
|
||||||
|
|
||||||
|
String connectionUrl;
|
||||||
|
|
||||||
|
|
||||||
|
private FunctionContext(boolean rollbackOnFailure, Charset charset, String statementDelimiter) {
|
||||||
|
super(rollbackOnFailure, false);
|
||||||
|
this.charset = charset;
|
||||||
|
this.statementDelimiter = statementDelimiter;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private InitConnection<FunctionContext, Connection> initConnection = (context, session, fc, ff) -> {
|
||||||
|
final Hive_1_1DBCPService dbcpService = context.getProperty(HIVE_DBCP_SERVICE).asControllerService(Hive_1_1DBCPService.class);
|
||||||
|
final Connection connection = dbcpService.getConnection(ff == null ? Collections.emptyMap() : ff.getAttributes());
|
||||||
|
fc.connectionUrl = dbcpService.getConnectionURL();
|
||||||
|
return connection;
|
||||||
|
};
|
||||||
|
|
||||||
|
private FetchFlowFiles<FunctionContext> fetchFlowFiles = (context, session, functionContext, result) -> {
|
||||||
|
final int batchSize = context.getProperty(BATCH_SIZE).asInteger();
|
||||||
|
return session.get(batchSize);
|
||||||
|
};
|
||||||
|
|
||||||
|
private Put.PutFlowFile<FunctionContext, Connection> putFlowFile = (context, session, fc, conn, flowFile, result) -> {
|
||||||
|
final String script = getHiveQL(session, flowFile, fc.charset);
|
||||||
|
String regex = "(?<!\\\\)" + Pattern.quote(fc.statementDelimiter);
|
||||||
|
|
||||||
|
String[] hiveQLs = script.split(regex);
|
||||||
|
|
||||||
|
final Set<TableName> tableNames = new HashSet<>();
|
||||||
|
exceptionHandler.execute(fc, flowFile, input -> {
|
||||||
|
int loc = 1;
|
||||||
|
for (String hiveQLStr: hiveQLs) {
|
||||||
|
getLogger().debug("HiveQL: {}", new Object[]{hiveQLStr});
|
||||||
|
|
||||||
|
final String hiveQL = hiveQLStr.trim();
|
||||||
|
if (!StringUtils.isEmpty(hiveQL)) {
|
||||||
|
final PreparedStatement stmt = conn.prepareStatement(hiveQL);
|
||||||
|
|
||||||
|
// Get ParameterMetadata
|
||||||
|
// Hive JDBC Doesn't support this yet:
|
||||||
|
// ParameterMetaData pmd = stmt.getParameterMetaData();
|
||||||
|
// int paramCount = pmd.getParameterCount();
|
||||||
|
int paramCount = StringUtils.countMatches(hiveQL, "?");
|
||||||
|
|
||||||
|
if (paramCount > 0) {
|
||||||
|
loc = setParameters(loc, stmt, paramCount, flowFile.getAttributes());
|
||||||
|
}
|
||||||
|
|
||||||
|
// Parse hiveQL and extract input/output tables
|
||||||
|
try {
|
||||||
|
tableNames.addAll(findTableNames(hiveQL));
|
||||||
|
} catch (Exception e) {
|
||||||
|
// If failed to parse the query, just log a warning message, but continue.
|
||||||
|
getLogger().warn("Failed to parse hiveQL: {} due to {}", new Object[]{hiveQL, e}, e);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Execute the statement
|
||||||
|
stmt.execute();
|
||||||
|
fc.proceed();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Emit a Provenance SEND event
|
||||||
|
final long transmissionMillis = TimeUnit.NANOSECONDS.toMillis(System.nanoTime() - fc.startNanos);
|
||||||
|
|
||||||
|
final FlowFile updatedFlowFile = session.putAllAttributes(flowFile, toQueryTableAttributes(tableNames));
|
||||||
|
session.getProvenanceReporter().send(updatedFlowFile, fc.connectionUrl, transmissionMillis, true);
|
||||||
|
result.routeTo(flowFile, REL_SUCCESS);
|
||||||
|
|
||||||
|
}, onFlowFileError(context, session, result));
|
||||||
|
|
||||||
|
};
|
||||||
|
|
||||||
|
private OnError<FunctionContext, FlowFile> onFlowFileError(final ProcessContext context, final ProcessSession session, final RoutingResult result) {
|
||||||
|
OnError<FunctionContext, FlowFile> onFlowFileError = ExceptionHandler.createOnError(context, session, result, REL_FAILURE, REL_RETRY);
|
||||||
|
onFlowFileError = onFlowFileError.andThen((c, i, r, e) -> {
|
||||||
|
switch (r.destination()) {
|
||||||
|
case Failure:
|
||||||
|
getLogger().error("Failed to update Hive for {} due to {}; routing to failure", new Object[] {i, e}, e);
|
||||||
|
break;
|
||||||
|
case Retry:
|
||||||
|
getLogger().error("Failed to update Hive for {} due to {}; it is possible that retrying the operation will succeed, so routing to retry",
|
||||||
|
new Object[] {i, e}, e);
|
||||||
|
break;
|
||||||
|
case Self:
|
||||||
|
getLogger().error("Failed to update Hive for {} due to {};", new Object[] {i, e}, e);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
});
|
||||||
|
return RollbackOnFailure.createOnError(onFlowFileError);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void onTrigger(ProcessContext context, ProcessSessionFactory sessionFactory) throws ProcessException {
|
||||||
|
final Boolean rollbackOnFailure = context.getProperty(RollbackOnFailure.ROLLBACK_ON_FAILURE).asBoolean();
|
||||||
|
final Charset charset = Charset.forName(context.getProperty(CHARSET).getValue());
|
||||||
|
final String statementDelimiter = context.getProperty(STATEMENT_DELIMITER).getValue();
|
||||||
|
final FunctionContext functionContext = new FunctionContext(rollbackOnFailure, charset, statementDelimiter);
|
||||||
|
RollbackOnFailure.onTrigger(context, sessionFactory, functionContext, getLogger(), session -> process.onTrigger(context, session, functionContext));
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,552 @@
|
||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
package org.apache.nifi.processors.hive;
|
||||||
|
|
||||||
|
import java.nio.charset.Charset;
|
||||||
|
import java.sql.Connection;
|
||||||
|
import java.sql.PreparedStatement;
|
||||||
|
import java.sql.ResultSet;
|
||||||
|
import java.sql.Statement;
|
||||||
|
import java.sql.SQLException;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.Collections;
|
||||||
|
import java.util.HashMap;
|
||||||
|
import java.util.HashSet;
|
||||||
|
import java.util.LinkedList;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Map;
|
||||||
|
import java.util.Set;
|
||||||
|
import java.util.UUID;
|
||||||
|
import java.util.concurrent.TimeUnit;
|
||||||
|
import java.util.concurrent.atomic.AtomicLong;
|
||||||
|
|
||||||
|
import org.apache.commons.io.IOUtils;
|
||||||
|
import org.apache.commons.lang.StringUtils;
|
||||||
|
import org.apache.commons.lang3.tuple.Pair;
|
||||||
|
import org.apache.nifi.annotation.behavior.EventDriven;
|
||||||
|
import org.apache.nifi.annotation.behavior.InputRequirement;
|
||||||
|
import org.apache.nifi.annotation.behavior.InputRequirement.Requirement;
|
||||||
|
import org.apache.nifi.annotation.behavior.WritesAttribute;
|
||||||
|
import org.apache.nifi.annotation.behavior.WritesAttributes;
|
||||||
|
import org.apache.nifi.annotation.documentation.CapabilityDescription;
|
||||||
|
import org.apache.nifi.annotation.documentation.Tags;
|
||||||
|
import org.apache.nifi.annotation.lifecycle.OnScheduled;
|
||||||
|
import org.apache.nifi.components.PropertyDescriptor;
|
||||||
|
import org.apache.nifi.dbcp.hive.Hive_1_1DBCPService;
|
||||||
|
import org.apache.nifi.expression.ExpressionLanguageScope;
|
||||||
|
import org.apache.nifi.flowfile.FlowFile;
|
||||||
|
import org.apache.nifi.flowfile.attributes.CoreAttributes;
|
||||||
|
import org.apache.nifi.logging.ComponentLog;
|
||||||
|
import org.apache.nifi.processor.ProcessContext;
|
||||||
|
import org.apache.nifi.processor.ProcessSession;
|
||||||
|
import org.apache.nifi.processor.ProcessSessionFactory;
|
||||||
|
import org.apache.nifi.processor.Relationship;
|
||||||
|
import org.apache.nifi.processor.exception.ProcessException;
|
||||||
|
import org.apache.nifi.processor.util.StandardValidators;
|
||||||
|
import org.apache.nifi.processor.util.pattern.PartialFunctions;
|
||||||
|
import org.apache.nifi.util.StopWatch;
|
||||||
|
import org.apache.nifi.util.hive.CsvOutputOptions;
|
||||||
|
import org.apache.nifi.util.hive.HiveJdbcCommon;
|
||||||
|
|
||||||
|
import static org.apache.nifi.util.hive.HiveJdbcCommon.AVRO;
|
||||||
|
import static org.apache.nifi.util.hive.HiveJdbcCommon.CSV;
|
||||||
|
import static org.apache.nifi.util.hive.HiveJdbcCommon.CSV_MIME_TYPE;
|
||||||
|
import static org.apache.nifi.util.hive.HiveJdbcCommon.MIME_TYPE_AVRO_BINARY;
|
||||||
|
import static org.apache.nifi.util.hive.HiveJdbcCommon.NORMALIZE_NAMES_FOR_AVRO;
|
||||||
|
|
||||||
|
@EventDriven
|
||||||
|
@InputRequirement(Requirement.INPUT_ALLOWED)
|
||||||
|
@Tags({"hive", "sql", "select", "jdbc", "query", "database"})
|
||||||
|
@CapabilityDescription("Execute provided HiveQL SELECT query against a Hive database connection. Query result will be converted to Avro or CSV format."
|
||||||
|
+ " Streaming is used so arbitrarily large result sets are supported. This processor can be scheduled to run on "
|
||||||
|
+ "a timer, or cron expression, using the standard scheduling methods, or it can be triggered by an incoming FlowFile. "
|
||||||
|
+ "If it is triggered by an incoming FlowFile, then attributes of that FlowFile will be available when evaluating the "
|
||||||
|
+ "select query. FlowFile attribute 'selecthiveql.row.count' indicates how many rows were selected.")
|
||||||
|
@WritesAttributes({
|
||||||
|
@WritesAttribute(attribute = "mime.type", description = "Sets the MIME type for the outgoing flowfile to application/avro-binary for Avro or text/csv for CSV."),
|
||||||
|
@WritesAttribute(attribute = "filename", description = "Adds .avro or .csv to the filename attribute depending on which output format is selected."),
|
||||||
|
@WritesAttribute(attribute = "selecthiveql.row.count", description = "Indicates how many rows were selected/returned by the query."),
|
||||||
|
@WritesAttribute(attribute = "fragment.identifier", description = "If 'Max Rows Per Flow File' is set then all FlowFiles from the same query result set "
|
||||||
|
+ "will have the same value for the fragment.identifier attribute. This can then be used to correlate the results."),
|
||||||
|
@WritesAttribute(attribute = "fragment.count", description = "If 'Max Rows Per Flow File' is set then this is the total number of "
|
||||||
|
+ "FlowFiles produced by a single ResultSet. This can be used in conjunction with the "
|
||||||
|
+ "fragment.identifier attribute in order to know how many FlowFiles belonged to the same incoming ResultSet."),
|
||||||
|
@WritesAttribute(attribute = "fragment.index", description = "If 'Max Rows Per Flow File' is set then the position of this FlowFile in the list of "
|
||||||
|
+ "outgoing FlowFiles that were all derived from the same result set FlowFile. This can be "
|
||||||
|
+ "used in conjunction with the fragment.identifier attribute to know which FlowFiles originated from the same query result set and in what order "
|
||||||
|
+ "FlowFiles were produced"),
|
||||||
|
@WritesAttribute(attribute = "query.input.tables", description = "Contains input table names in comma delimited 'databaseName.tableName' format.")
|
||||||
|
})
|
||||||
|
public class SelectHive_1_1QL extends AbstractHive_1_1QLProcessor {
|
||||||
|
|
||||||
|
public static final String RESULT_ROW_COUNT = "selecthiveql.row.count";
|
||||||
|
|
||||||
|
// Relationships
|
||||||
|
public static final Relationship REL_SUCCESS = new Relationship.Builder()
|
||||||
|
.name("success")
|
||||||
|
.description("Successfully created FlowFile from HiveQL query result set.")
|
||||||
|
.build();
|
||||||
|
public static final Relationship REL_FAILURE = new Relationship.Builder()
|
||||||
|
.name("failure")
|
||||||
|
.description("HiveQL query execution failed. Incoming FlowFile will be penalized and routed to this relationship.")
|
||||||
|
.build();
|
||||||
|
|
||||||
|
|
||||||
|
public static final PropertyDescriptor HIVEQL_PRE_QUERY = new PropertyDescriptor.Builder()
|
||||||
|
.name("hive-pre-query")
|
||||||
|
.displayName("HiveQL Pre-Query")
|
||||||
|
.description("A semicolon-delimited list of queries executed before the main SQL query is executed. "
|
||||||
|
+ "Example: 'set tez.queue.name=queue1; set hive.exec.orc.split.strategy=ETL; set hive.exec.reducers.bytes.per.reducer=1073741824'. "
|
||||||
|
+ "Note, the results/outputs of these queries will be suppressed if successfully executed.")
|
||||||
|
.required(false)
|
||||||
|
.addValidator(StandardValidators.NON_EMPTY_VALIDATOR)
|
||||||
|
.expressionLanguageSupported(ExpressionLanguageScope.FLOWFILE_ATTRIBUTES)
|
||||||
|
.build();
|
||||||
|
|
||||||
|
public static final PropertyDescriptor HIVEQL_SELECT_QUERY = new PropertyDescriptor.Builder()
|
||||||
|
.name("hive-query")
|
||||||
|
.displayName("HiveQL Select Query")
|
||||||
|
.description("HiveQL SELECT query to execute. If this is not set, the query is assumed to be in the content of an incoming FlowFile.")
|
||||||
|
.required(false)
|
||||||
|
.addValidator(StandardValidators.NON_EMPTY_VALIDATOR)
|
||||||
|
.expressionLanguageSupported(ExpressionLanguageScope.FLOWFILE_ATTRIBUTES)
|
||||||
|
.build();
|
||||||
|
|
||||||
|
public static final PropertyDescriptor HIVEQL_POST_QUERY = new PropertyDescriptor.Builder()
|
||||||
|
.name("hive-post-query")
|
||||||
|
.displayName("HiveQL Post-Query")
|
||||||
|
.description("A semicolon-delimited list of queries executed after the main SQL query is executed. "
|
||||||
|
+ "Note, the results/outputs of these queries will be suppressed if successfully executed.")
|
||||||
|
.required(false)
|
||||||
|
.addValidator(StandardValidators.NON_EMPTY_VALIDATOR)
|
||||||
|
.expressionLanguageSupported(ExpressionLanguageScope.FLOWFILE_ATTRIBUTES)
|
||||||
|
.build();
|
||||||
|
|
||||||
|
public static final PropertyDescriptor FETCH_SIZE = new PropertyDescriptor.Builder()
|
||||||
|
.name("hive-fetch-size")
|
||||||
|
.displayName("Fetch Size")
|
||||||
|
.description("The number of result rows to be fetched from the result set at a time. This is a hint to the driver and may not be "
|
||||||
|
+ "honored and/or exact. If the value specified is zero, then the hint is ignored.")
|
||||||
|
.defaultValue("0")
|
||||||
|
.required(true)
|
||||||
|
.addValidator(StandardValidators.NON_NEGATIVE_INTEGER_VALIDATOR)
|
||||||
|
.expressionLanguageSupported(ExpressionLanguageScope.FLOWFILE_ATTRIBUTES)
|
||||||
|
.build();
|
||||||
|
|
||||||
|
public static final PropertyDescriptor MAX_ROWS_PER_FLOW_FILE = new PropertyDescriptor.Builder()
|
||||||
|
.name("hive-max-rows")
|
||||||
|
.displayName("Max Rows Per Flow File")
|
||||||
|
.description("The maximum number of result rows that will be included in a single FlowFile. " +
|
||||||
|
"This will allow you to break up very large result sets into multiple FlowFiles. If the value specified is zero, then all rows are returned in a single FlowFile.")
|
||||||
|
.defaultValue("0")
|
||||||
|
.required(true)
|
||||||
|
.addValidator(StandardValidators.NON_NEGATIVE_INTEGER_VALIDATOR)
|
||||||
|
.expressionLanguageSupported(ExpressionLanguageScope.FLOWFILE_ATTRIBUTES)
|
||||||
|
.build();
|
||||||
|
|
||||||
|
public static final PropertyDescriptor MAX_FRAGMENTS = new PropertyDescriptor.Builder()
|
||||||
|
.name("hive-max-frags")
|
||||||
|
.displayName("Maximum Number of Fragments")
|
||||||
|
.description("The maximum number of fragments. If the value specified is zero, then all fragments are returned. " +
|
||||||
|
"This prevents OutOfMemoryError when this processor ingests huge table.")
|
||||||
|
.defaultValue("0")
|
||||||
|
.required(true)
|
||||||
|
.addValidator(StandardValidators.NON_NEGATIVE_INTEGER_VALIDATOR)
|
||||||
|
.expressionLanguageSupported(ExpressionLanguageScope.FLOWFILE_ATTRIBUTES)
|
||||||
|
.build();
|
||||||
|
|
||||||
|
public static final PropertyDescriptor HIVEQL_CSV_HEADER = new PropertyDescriptor.Builder()
|
||||||
|
.name("csv-header")
|
||||||
|
.displayName("CSV Header")
|
||||||
|
.description("Include Header in Output")
|
||||||
|
.required(true)
|
||||||
|
.allowableValues("true", "false")
|
||||||
|
.defaultValue("true")
|
||||||
|
.addValidator(StandardValidators.BOOLEAN_VALIDATOR)
|
||||||
|
.build();
|
||||||
|
|
||||||
|
public static final PropertyDescriptor HIVEQL_CSV_ALT_HEADER = new PropertyDescriptor.Builder()
|
||||||
|
.name("csv-alt-header")
|
||||||
|
.displayName("Alternate CSV Header")
|
||||||
|
.description("Comma separated list of header fields")
|
||||||
|
.required(false)
|
||||||
|
.addValidator(StandardValidators.NON_EMPTY_VALIDATOR)
|
||||||
|
.expressionLanguageSupported(ExpressionLanguageScope.FLOWFILE_ATTRIBUTES)
|
||||||
|
.build();
|
||||||
|
|
||||||
|
public static final PropertyDescriptor HIVEQL_CSV_DELIMITER = new PropertyDescriptor.Builder()
|
||||||
|
.name("csv-delimiter")
|
||||||
|
.displayName("CSV Delimiter")
|
||||||
|
.description("CSV Delimiter used to separate fields")
|
||||||
|
.required(true)
|
||||||
|
.defaultValue(",")
|
||||||
|
.addValidator(StandardValidators.NON_EMPTY_VALIDATOR)
|
||||||
|
.expressionLanguageSupported(ExpressionLanguageScope.FLOWFILE_ATTRIBUTES)
|
||||||
|
.build();
|
||||||
|
|
||||||
|
public static final PropertyDescriptor HIVEQL_CSV_QUOTE = new PropertyDescriptor.Builder()
|
||||||
|
.name("csv-quote")
|
||||||
|
.displayName("CSV Quote")
|
||||||
|
.description("Whether to force quoting of CSV fields. Note that this might conflict with the setting for CSV Escape.")
|
||||||
|
.required(true)
|
||||||
|
.allowableValues("true", "false")
|
||||||
|
.defaultValue("true")
|
||||||
|
.addValidator(StandardValidators.BOOLEAN_VALIDATOR)
|
||||||
|
.build();
|
||||||
|
public static final PropertyDescriptor HIVEQL_CSV_ESCAPE = new PropertyDescriptor.Builder()
|
||||||
|
.name("csv-escape")
|
||||||
|
.displayName("CSV Escape")
|
||||||
|
.description("Whether to escape CSV strings in output. Note that this might conflict with the setting for CSV Quote.")
|
||||||
|
.required(true)
|
||||||
|
.allowableValues("true", "false")
|
||||||
|
.defaultValue("true")
|
||||||
|
.addValidator(StandardValidators.BOOLEAN_VALIDATOR)
|
||||||
|
.build();
|
||||||
|
|
||||||
|
public static final PropertyDescriptor HIVEQL_OUTPUT_FORMAT = new PropertyDescriptor.Builder()
|
||||||
|
.name("hive-output-format")
|
||||||
|
.displayName("Output Format")
|
||||||
|
.description("How to represent the records coming from Hive (Avro, CSV, e.g.)")
|
||||||
|
.required(true)
|
||||||
|
.allowableValues(AVRO, CSV)
|
||||||
|
.defaultValue(AVRO)
|
||||||
|
.expressionLanguageSupported(ExpressionLanguageScope.NONE)
|
||||||
|
.build();
|
||||||
|
|
||||||
|
private final static List<PropertyDescriptor> propertyDescriptors;
|
||||||
|
private final static Set<Relationship> relationships;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Will ensure that the list of property descriptors is built only once.
|
||||||
|
* Will also create a Set of relationships
|
||||||
|
*/
|
||||||
|
static {
|
||||||
|
List<PropertyDescriptor> _propertyDescriptors = new ArrayList<>();
|
||||||
|
_propertyDescriptors.add(HIVE_DBCP_SERVICE);
|
||||||
|
_propertyDescriptors.add(HIVEQL_PRE_QUERY);
|
||||||
|
_propertyDescriptors.add(HIVEQL_SELECT_QUERY);
|
||||||
|
_propertyDescriptors.add(HIVEQL_POST_QUERY);
|
||||||
|
_propertyDescriptors.add(FETCH_SIZE);
|
||||||
|
_propertyDescriptors.add(MAX_ROWS_PER_FLOW_FILE);
|
||||||
|
_propertyDescriptors.add(MAX_FRAGMENTS);
|
||||||
|
_propertyDescriptors.add(HIVEQL_OUTPUT_FORMAT);
|
||||||
|
_propertyDescriptors.add(NORMALIZE_NAMES_FOR_AVRO);
|
||||||
|
_propertyDescriptors.add(HIVEQL_CSV_HEADER);
|
||||||
|
_propertyDescriptors.add(HIVEQL_CSV_ALT_HEADER);
|
||||||
|
_propertyDescriptors.add(HIVEQL_CSV_DELIMITER);
|
||||||
|
_propertyDescriptors.add(HIVEQL_CSV_QUOTE);
|
||||||
|
_propertyDescriptors.add(HIVEQL_CSV_ESCAPE);
|
||||||
|
_propertyDescriptors.add(CHARSET);
|
||||||
|
propertyDescriptors = Collections.unmodifiableList(_propertyDescriptors);
|
||||||
|
|
||||||
|
Set<Relationship> _relationships = new HashSet<>();
|
||||||
|
_relationships.add(REL_SUCCESS);
|
||||||
|
_relationships.add(REL_FAILURE);
|
||||||
|
relationships = Collections.unmodifiableSet(_relationships);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected List<PropertyDescriptor> getSupportedPropertyDescriptors() {
|
||||||
|
return propertyDescriptors;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Set<Relationship> getRelationships() {
|
||||||
|
return relationships;
|
||||||
|
}
|
||||||
|
|
||||||
|
@OnScheduled
|
||||||
|
public void setup(ProcessContext context) {
|
||||||
|
// If the query is not set, then an incoming flow file is needed. Otherwise fail the initialization
|
||||||
|
if (!context.getProperty(HIVEQL_SELECT_QUERY).isSet() && !context.hasIncomingConnection()) {
|
||||||
|
final String errorString = "Either the Select Query must be specified or there must be an incoming connection "
|
||||||
|
+ "providing flowfile(s) containing a SQL select query";
|
||||||
|
getLogger().error(errorString);
|
||||||
|
throw new ProcessException(errorString);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void onTrigger(ProcessContext context, ProcessSessionFactory sessionFactory) throws ProcessException {
|
||||||
|
PartialFunctions.onTrigger(context, sessionFactory, getLogger(), session -> onTrigger(context, session));
|
||||||
|
}
|
||||||
|
|
||||||
|
private void onTrigger(final ProcessContext context, final ProcessSession session) throws ProcessException {
|
||||||
|
FlowFile fileToProcess = (context.hasIncomingConnection() ? session.get() : null);
|
||||||
|
FlowFile flowfile = null;
|
||||||
|
|
||||||
|
// If we have no FlowFile, and all incoming connections are self-loops then we can continue on.
|
||||||
|
// However, if we have no FlowFile and we have connections coming from other Processors, then
|
||||||
|
// we know that we should run only if we have a FlowFile.
|
||||||
|
if (context.hasIncomingConnection()) {
|
||||||
|
if (fileToProcess == null && context.hasNonLoopConnection()) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
final ComponentLog logger = getLogger();
|
||||||
|
final Hive_1_1DBCPService dbcpService = context.getProperty(HIVE_DBCP_SERVICE).asControllerService(Hive_1_1DBCPService.class);
|
||||||
|
final Charset charset = Charset.forName(context.getProperty(CHARSET).getValue());
|
||||||
|
|
||||||
|
List<String> preQueries = getQueries(context.getProperty(HIVEQL_PRE_QUERY).evaluateAttributeExpressions(fileToProcess).getValue());
|
||||||
|
List<String> postQueries = getQueries(context.getProperty(HIVEQL_POST_QUERY).evaluateAttributeExpressions(fileToProcess).getValue());
|
||||||
|
|
||||||
|
final boolean flowbased = !(context.getProperty(HIVEQL_SELECT_QUERY).isSet());
|
||||||
|
|
||||||
|
// Source the SQL
|
||||||
|
String hqlStatement;
|
||||||
|
|
||||||
|
if (context.getProperty(HIVEQL_SELECT_QUERY).isSet()) {
|
||||||
|
hqlStatement = context.getProperty(HIVEQL_SELECT_QUERY).evaluateAttributeExpressions(fileToProcess).getValue();
|
||||||
|
} else {
|
||||||
|
// If the query is not set, then an incoming flow file is required, and expected to contain a valid SQL select query.
|
||||||
|
// If there is no incoming connection, onTrigger will not be called as the processor will fail when scheduled.
|
||||||
|
final StringBuilder queryContents = new StringBuilder();
|
||||||
|
session.read(fileToProcess, in -> queryContents.append(IOUtils.toString(in, charset)));
|
||||||
|
hqlStatement = queryContents.toString();
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
final Integer fetchSize = context.getProperty(FETCH_SIZE).evaluateAttributeExpressions(fileToProcess).asInteger();
|
||||||
|
final Integer maxRowsPerFlowFile = context.getProperty(MAX_ROWS_PER_FLOW_FILE).evaluateAttributeExpressions(fileToProcess).asInteger();
|
||||||
|
final Integer maxFragments = context.getProperty(MAX_FRAGMENTS).isSet()
|
||||||
|
? context.getProperty(MAX_FRAGMENTS).evaluateAttributeExpressions(fileToProcess).asInteger()
|
||||||
|
: 0;
|
||||||
|
final String outputFormat = context.getProperty(HIVEQL_OUTPUT_FORMAT).getValue();
|
||||||
|
final boolean convertNamesForAvro = context.getProperty(NORMALIZE_NAMES_FOR_AVRO).asBoolean();
|
||||||
|
final StopWatch stopWatch = new StopWatch(true);
|
||||||
|
final boolean header = context.getProperty(HIVEQL_CSV_HEADER).asBoolean();
|
||||||
|
final String altHeader = context.getProperty(HIVEQL_CSV_ALT_HEADER).evaluateAttributeExpressions(fileToProcess).getValue();
|
||||||
|
final String delimiter = context.getProperty(HIVEQL_CSV_DELIMITER).evaluateAttributeExpressions(fileToProcess).getValue();
|
||||||
|
final boolean quote = context.getProperty(HIVEQL_CSV_QUOTE).asBoolean();
|
||||||
|
final boolean escape = context.getProperty(HIVEQL_CSV_HEADER).asBoolean();
|
||||||
|
final String fragmentIdentifier = UUID.randomUUID().toString();
|
||||||
|
|
||||||
|
try (final Connection con = dbcpService.getConnection(fileToProcess == null ? Collections.emptyMap() : fileToProcess.getAttributes());
|
||||||
|
final Statement st = (flowbased ? con.prepareStatement(hqlStatement) : con.createStatement())
|
||||||
|
) {
|
||||||
|
Pair<String,SQLException> failure = executeConfigStatements(con, preQueries);
|
||||||
|
if (failure != null) {
|
||||||
|
// In case of failure, assigning config query to "hqlStatement" to follow current error handling
|
||||||
|
hqlStatement = failure.getLeft();
|
||||||
|
flowfile = (fileToProcess == null) ? session.create() : fileToProcess;
|
||||||
|
fileToProcess = null;
|
||||||
|
throw failure.getRight();
|
||||||
|
}
|
||||||
|
if (fetchSize != null && fetchSize > 0) {
|
||||||
|
try {
|
||||||
|
st.setFetchSize(fetchSize);
|
||||||
|
} catch (SQLException se) {
|
||||||
|
// Not all drivers support this, just log the error (at debug level) and move on
|
||||||
|
logger.debug("Cannot set fetch size to {} due to {}", new Object[]{fetchSize, se.getLocalizedMessage()}, se);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
final List<FlowFile> resultSetFlowFiles = new ArrayList<>();
|
||||||
|
try {
|
||||||
|
logger.debug("Executing query {}", new Object[]{hqlStatement});
|
||||||
|
if (flowbased) {
|
||||||
|
// Hive JDBC Doesn't Support this yet:
|
||||||
|
// ParameterMetaData pmd = ((PreparedStatement)st).getParameterMetaData();
|
||||||
|
// int paramCount = pmd.getParameterCount();
|
||||||
|
|
||||||
|
// Alternate way to determine number of params in SQL.
|
||||||
|
int paramCount = StringUtils.countMatches(hqlStatement, "?");
|
||||||
|
|
||||||
|
if (paramCount > 0) {
|
||||||
|
setParameters(1, (PreparedStatement) st, paramCount, fileToProcess.getAttributes());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
final ResultSet resultSet;
|
||||||
|
|
||||||
|
try {
|
||||||
|
resultSet = (flowbased ? ((PreparedStatement) st).executeQuery() : st.executeQuery(hqlStatement));
|
||||||
|
} catch (SQLException se) {
|
||||||
|
// If an error occurs during the query, a flowfile is expected to be routed to failure, so ensure one here
|
||||||
|
flowfile = (fileToProcess == null) ? session.create() : fileToProcess;
|
||||||
|
fileToProcess = null;
|
||||||
|
throw se;
|
||||||
|
}
|
||||||
|
|
||||||
|
int fragmentIndex = 0;
|
||||||
|
String baseFilename = (fileToProcess != null) ? fileToProcess.getAttribute(CoreAttributes.FILENAME.key()) : null;
|
||||||
|
while (true) {
|
||||||
|
final AtomicLong nrOfRows = new AtomicLong(0L);
|
||||||
|
flowfile = (fileToProcess == null) ? session.create() : session.create(fileToProcess);
|
||||||
|
if (baseFilename == null) {
|
||||||
|
baseFilename = flowfile.getAttribute(CoreAttributes.FILENAME.key());
|
||||||
|
}
|
||||||
|
try {
|
||||||
|
flowfile = session.write(flowfile, out -> {
|
||||||
|
try {
|
||||||
|
if (AVRO.equals(outputFormat)) {
|
||||||
|
nrOfRows.set(HiveJdbcCommon.convertToAvroStream(resultSet, out, maxRowsPerFlowFile, convertNamesForAvro));
|
||||||
|
} else if (CSV.equals(outputFormat)) {
|
||||||
|
CsvOutputOptions options = new CsvOutputOptions(header, altHeader, delimiter, quote, escape, maxRowsPerFlowFile);
|
||||||
|
nrOfRows.set(HiveJdbcCommon.convertToCsvStream(resultSet, out, options));
|
||||||
|
} else {
|
||||||
|
nrOfRows.set(0L);
|
||||||
|
throw new ProcessException("Unsupported output format: " + outputFormat);
|
||||||
|
}
|
||||||
|
} catch (final SQLException | RuntimeException e) {
|
||||||
|
throw new ProcessException("Error during database query or conversion of records.", e);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
} catch (ProcessException e) {
|
||||||
|
// Add flowfile to results before rethrowing so it will be removed from session in outer catch
|
||||||
|
resultSetFlowFiles.add(flowfile);
|
||||||
|
throw e;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (nrOfRows.get() > 0 || resultSetFlowFiles.isEmpty()) {
|
||||||
|
final Map<String, String> attributes = new HashMap<>();
|
||||||
|
// Set attribute for how many rows were selected
|
||||||
|
attributes.put(RESULT_ROW_COUNT, String.valueOf(nrOfRows.get()));
|
||||||
|
|
||||||
|
try {
|
||||||
|
// Set input/output table names by parsing the query
|
||||||
|
attributes.putAll(toQueryTableAttributes(findTableNames(hqlStatement)));
|
||||||
|
} catch (Exception e) {
|
||||||
|
// If failed to parse the query, just log a warning message, but continue.
|
||||||
|
getLogger().warn("Failed to parse query: {} due to {}", new Object[]{hqlStatement, e}, e);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Set MIME type on output document and add extension to filename
|
||||||
|
if (AVRO.equals(outputFormat)) {
|
||||||
|
attributes.put(CoreAttributes.MIME_TYPE.key(), MIME_TYPE_AVRO_BINARY);
|
||||||
|
attributes.put(CoreAttributes.FILENAME.key(), baseFilename + "." + fragmentIndex + ".avro");
|
||||||
|
} else if (CSV.equals(outputFormat)) {
|
||||||
|
attributes.put(CoreAttributes.MIME_TYPE.key(), CSV_MIME_TYPE);
|
||||||
|
attributes.put(CoreAttributes.FILENAME.key(), baseFilename + "." + fragmentIndex + ".csv");
|
||||||
|
}
|
||||||
|
|
||||||
|
if (maxRowsPerFlowFile > 0) {
|
||||||
|
attributes.put("fragment.identifier", fragmentIdentifier);
|
||||||
|
attributes.put("fragment.index", String.valueOf(fragmentIndex));
|
||||||
|
}
|
||||||
|
|
||||||
|
flowfile = session.putAllAttributes(flowfile, attributes);
|
||||||
|
|
||||||
|
logger.info("{} contains {} " + outputFormat + " records; transferring to 'success'",
|
||||||
|
new Object[]{flowfile, nrOfRows.get()});
|
||||||
|
|
||||||
|
if (context.hasIncomingConnection()) {
|
||||||
|
// If the flow file came from an incoming connection, issue a Fetch provenance event
|
||||||
|
session.getProvenanceReporter().fetch(flowfile, dbcpService.getConnectionURL(),
|
||||||
|
"Retrieved " + nrOfRows.get() + " rows", stopWatch.getElapsed(TimeUnit.MILLISECONDS));
|
||||||
|
} else {
|
||||||
|
// If we created a flow file from rows received from Hive, issue a Receive provenance event
|
||||||
|
session.getProvenanceReporter().receive(flowfile, dbcpService.getConnectionURL(), stopWatch.getElapsed(TimeUnit.MILLISECONDS));
|
||||||
|
}
|
||||||
|
resultSetFlowFiles.add(flowfile);
|
||||||
|
} else {
|
||||||
|
// If there were no rows returned (and the first flow file has been sent, we're done processing, so remove the flowfile and carry on
|
||||||
|
session.remove(flowfile);
|
||||||
|
if (resultSetFlowFiles != null && resultSetFlowFiles.size()>0) {
|
||||||
|
flowfile = resultSetFlowFiles.get(resultSetFlowFiles.size()-1);
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
fragmentIndex++;
|
||||||
|
if (maxFragments > 0 && fragmentIndex >= maxFragments) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
for (int i = 0; i < resultSetFlowFiles.size(); i++) {
|
||||||
|
// Set count on all FlowFiles
|
||||||
|
if (maxRowsPerFlowFile > 0) {
|
||||||
|
resultSetFlowFiles.set(i,
|
||||||
|
session.putAttribute(resultSetFlowFiles.get(i), "fragment.count", Integer.toString(fragmentIndex)));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
} catch (final SQLException e) {
|
||||||
|
throw e;
|
||||||
|
}
|
||||||
|
|
||||||
|
failure = executeConfigStatements(con, postQueries);
|
||||||
|
if (failure != null) {
|
||||||
|
hqlStatement = failure.getLeft();
|
||||||
|
if (resultSetFlowFiles != null) {
|
||||||
|
resultSetFlowFiles.forEach(ff -> session.remove(ff));
|
||||||
|
}
|
||||||
|
flowfile = (fileToProcess == null) ? session.create() : fileToProcess;
|
||||||
|
fileToProcess = null;
|
||||||
|
throw failure.getRight();
|
||||||
|
}
|
||||||
|
|
||||||
|
session.transfer(resultSetFlowFiles, REL_SUCCESS);
|
||||||
|
if (fileToProcess != null) {
|
||||||
|
session.remove(fileToProcess);
|
||||||
|
}
|
||||||
|
} catch (final ProcessException | SQLException e) {
|
||||||
|
logger.error("Issue processing SQL {} due to {}.", new Object[]{hqlStatement, e});
|
||||||
|
if (flowfile == null) {
|
||||||
|
// This can happen if any exceptions occur while setting up the connection, statement, etc.
|
||||||
|
logger.error("Unable to execute HiveQL select query {} due to {}. No FlowFile to route to failure",
|
||||||
|
new Object[]{hqlStatement, e});
|
||||||
|
context.yield();
|
||||||
|
} else {
|
||||||
|
if (context.hasIncomingConnection()) {
|
||||||
|
logger.error("Unable to execute HiveQL select query {} for {} due to {}; routing to failure",
|
||||||
|
new Object[]{hqlStatement, flowfile, e});
|
||||||
|
flowfile = session.penalize(flowfile);
|
||||||
|
} else {
|
||||||
|
logger.error("Unable to execute HiveQL select query {} due to {}; routing to failure",
|
||||||
|
new Object[]{hqlStatement, e});
|
||||||
|
context.yield();
|
||||||
|
}
|
||||||
|
session.transfer(flowfile, REL_FAILURE);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Executes given queries using pre-defined connection.
|
||||||
|
* Returns null on success, or a query string if failed.
|
||||||
|
*/
|
||||||
|
protected Pair<String,SQLException> executeConfigStatements(final Connection con, final List<String> configQueries){
|
||||||
|
if (configQueries == null || configQueries.isEmpty()) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (String confSQL : configQueries) {
|
||||||
|
try(final Statement st = con.createStatement()){
|
||||||
|
st.execute(confSQL);
|
||||||
|
} catch (SQLException e) {
|
||||||
|
return Pair.of(confSQL, e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
protected List<String> getQueries(final String value) {
|
||||||
|
if (value == null || value.length() == 0 || value.trim().length() == 0) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
final List<String> queries = new LinkedList<>();
|
||||||
|
for (String query : value.split(";")) {
|
||||||
|
if (query.trim().length() > 0) {
|
||||||
|
queries.add(query.trim());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return queries;
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,23 @@
|
||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
package org.apache.nifi.util.hive;
|
||||||
|
|
||||||
|
public class AuthenticationFailedException extends Exception {
|
||||||
|
public AuthenticationFailedException(String reason, Exception cause) {
|
||||||
|
super(reason, cause);
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,63 @@
|
||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
package org.apache.nifi.util.hive;
|
||||||
|
|
||||||
|
public class CsvOutputOptions {
|
||||||
|
|
||||||
|
private boolean header = true;
|
||||||
|
private String altHeader = null;
|
||||||
|
private String delimiter = ",";
|
||||||
|
private boolean quote = false;
|
||||||
|
private boolean escape = true;
|
||||||
|
|
||||||
|
private int maxRowsPerFlowFile = 0;
|
||||||
|
|
||||||
|
public boolean isHeader() {
|
||||||
|
return header;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getAltHeader() {
|
||||||
|
return altHeader;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
public String getDelimiter() {
|
||||||
|
return delimiter;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
public boolean isQuote() {
|
||||||
|
return quote;
|
||||||
|
}
|
||||||
|
|
||||||
|
public boolean isEscape() {
|
||||||
|
return escape;
|
||||||
|
}
|
||||||
|
|
||||||
|
public int getMaxRowsPerFlowFile() {
|
||||||
|
return maxRowsPerFlowFile;
|
||||||
|
}
|
||||||
|
|
||||||
|
public CsvOutputOptions(boolean header, String altHeader, String delimiter, boolean quote, boolean escape, int maxRowsPerFlowFile) {
|
||||||
|
this.header = header;
|
||||||
|
this.altHeader = altHeader;
|
||||||
|
this.delimiter = delimiter;
|
||||||
|
this.quote = quote;
|
||||||
|
this.escape = escape;
|
||||||
|
this.maxRowsPerFlowFile = maxRowsPerFlowFile;
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,116 @@
|
||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
package org.apache.nifi.util.hive;
|
||||||
|
|
||||||
|
import org.apache.commons.lang3.StringUtils;
|
||||||
|
import org.apache.hadoop.conf.Configuration;
|
||||||
|
import org.apache.hadoop.fs.FileSystem;
|
||||||
|
import org.apache.hadoop.fs.Path;
|
||||||
|
import org.apache.hadoop.hive.conf.HiveConf;
|
||||||
|
import org.apache.hadoop.security.UserGroupInformation;
|
||||||
|
import org.apache.nifi.components.ValidationResult;
|
||||||
|
import org.apache.nifi.hadoop.KerberosProperties;
|
||||||
|
import org.apache.nifi.hadoop.SecurityUtil;
|
||||||
|
import org.apache.nifi.logging.ComponentLog;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.Collection;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.concurrent.atomic.AtomicReference;
|
||||||
|
|
||||||
|
public class HiveConfigurator {
|
||||||
|
|
||||||
|
public Collection<ValidationResult> validate(String configFiles, String principal, String keyTab, AtomicReference<ValidationResources> validationResourceHolder, ComponentLog log) {
|
||||||
|
|
||||||
|
final List<ValidationResult> problems = new ArrayList<>();
|
||||||
|
ValidationResources resources = validationResourceHolder.get();
|
||||||
|
|
||||||
|
// if no resources in the holder, or if the holder has different resources loaded,
|
||||||
|
// then load the Configuration and set the new resources in the holder
|
||||||
|
if (resources == null || !configFiles.equals(resources.getConfigResources())) {
|
||||||
|
log.debug("Reloading validation resources");
|
||||||
|
resources = new ValidationResources(configFiles, getConfigurationFromFiles(configFiles));
|
||||||
|
validationResourceHolder.set(resources);
|
||||||
|
}
|
||||||
|
|
||||||
|
final Configuration hiveConfig = resources.getConfiguration();
|
||||||
|
|
||||||
|
problems.addAll(KerberosProperties.validatePrincipalAndKeytab(this.getClass().getSimpleName(), hiveConfig, principal, keyTab, log));
|
||||||
|
|
||||||
|
return problems;
|
||||||
|
}
|
||||||
|
|
||||||
|
public HiveConf getConfigurationFromFiles(final String configFiles) {
|
||||||
|
final HiveConf hiveConfig = new HiveConf();
|
||||||
|
if (StringUtils.isNotBlank(configFiles)) {
|
||||||
|
for (final String configFile : configFiles.split(",")) {
|
||||||
|
hiveConfig.addResource(new Path(configFile.trim()));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return hiveConfig;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void preload(Configuration configuration) {
|
||||||
|
try {
|
||||||
|
FileSystem.get(configuration).close();
|
||||||
|
UserGroupInformation.setConfiguration(configuration);
|
||||||
|
} catch (IOException ioe) {
|
||||||
|
// Suppress exception as future uses of this configuration will fail
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* As of Apache NiFi 1.5.0, due to changes made to
|
||||||
|
* {@link SecurityUtil#loginKerberos(Configuration, String, String)}, which is used by this
|
||||||
|
* class to authenticate a principal with Kerberos, Hive controller services no longer
|
||||||
|
* attempt relogins explicitly. For more information, please read the documentation for
|
||||||
|
* {@link SecurityUtil#loginKerberos(Configuration, String, String)}.
|
||||||
|
* <p/>
|
||||||
|
* In previous versions of NiFi, a {@link org.apache.nifi.hadoop.KerberosTicketRenewer} was started by
|
||||||
|
* {@link HiveConfigurator#authenticate(Configuration, String, String, long)} when the Hive
|
||||||
|
* controller service was enabled. The use of a separate thread to explicitly relogin could cause race conditions
|
||||||
|
* with the implicit relogin attempts made by hadoop/Hive code on a thread that references the same
|
||||||
|
* {@link UserGroupInformation} instance. One of these threads could leave the
|
||||||
|
* {@link javax.security.auth.Subject} in {@link UserGroupInformation} to be cleared or in an unexpected state
|
||||||
|
* while the other thread is attempting to use the {@link javax.security.auth.Subject}, resulting in failed
|
||||||
|
* authentication attempts that would leave the Hive controller service in an unrecoverable state.
|
||||||
|
*
|
||||||
|
* @see SecurityUtil#loginKerberos(Configuration, String, String)
|
||||||
|
*/
|
||||||
|
public UserGroupInformation authenticate(final Configuration hiveConfig, String principal, String keyTab) throws AuthenticationFailedException {
|
||||||
|
UserGroupInformation ugi;
|
||||||
|
try {
|
||||||
|
ugi = SecurityUtil.loginKerberos(hiveConfig, principal, keyTab);
|
||||||
|
} catch (IOException ioe) {
|
||||||
|
throw new AuthenticationFailedException("Kerberos Authentication for Hive failed", ioe);
|
||||||
|
}
|
||||||
|
return ugi;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* As of Apache NiFi 1.5.0, this method has been deprecated and is now a wrapper
|
||||||
|
* method which invokes {@link HiveConfigurator#authenticate(Configuration, String, String)}. It will no longer start a
|
||||||
|
* {@link org.apache.nifi.hadoop.KerberosTicketRenewer} to perform explicit relogins.
|
||||||
|
*
|
||||||
|
* @see HiveConfigurator#authenticate(Configuration, String, String)
|
||||||
|
*/
|
||||||
|
@Deprecated
|
||||||
|
public UserGroupInformation authenticate(final Configuration hiveConfig, String principal, String keyTab, long ticketRenewalPeriod) throws AuthenticationFailedException {
|
||||||
|
return authenticate(hiveConfig, principal, keyTab);
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,463 @@
|
||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
package org.apache.nifi.util.hive;
|
||||||
|
|
||||||
|
import org.apache.avro.Schema;
|
||||||
|
import org.apache.avro.SchemaBuilder;
|
||||||
|
import org.apache.avro.SchemaBuilder.FieldAssembler;
|
||||||
|
import org.apache.avro.file.DataFileWriter;
|
||||||
|
import org.apache.avro.generic.GenericData;
|
||||||
|
import org.apache.avro.generic.GenericDatumWriter;
|
||||||
|
import org.apache.avro.generic.GenericRecord;
|
||||||
|
import org.apache.avro.io.DatumWriter;
|
||||||
|
import org.apache.commons.text.StringEscapeUtils;
|
||||||
|
import org.apache.commons.lang3.StringUtils;
|
||||||
|
import org.apache.hadoop.conf.Configuration;
|
||||||
|
import org.apache.hadoop.fs.Path;
|
||||||
|
import org.apache.hadoop.hive.conf.HiveConf;
|
||||||
|
import org.apache.nifi.components.PropertyDescriptor;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.io.OutputStream;
|
||||||
|
import java.math.BigDecimal;
|
||||||
|
import java.math.BigInteger;
|
||||||
|
import java.nio.ByteBuffer;
|
||||||
|
import java.nio.charset.StandardCharsets;
|
||||||
|
import java.sql.ResultSet;
|
||||||
|
import java.sql.ResultSetMetaData;
|
||||||
|
import java.sql.SQLException;
|
||||||
|
import java.sql.SQLXML;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.Arrays;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
import static java.sql.Types.ARRAY;
|
||||||
|
import static java.sql.Types.BIGINT;
|
||||||
|
import static java.sql.Types.BINARY;
|
||||||
|
import static java.sql.Types.BIT;
|
||||||
|
import static java.sql.Types.BLOB;
|
||||||
|
import static java.sql.Types.BOOLEAN;
|
||||||
|
import static java.sql.Types.CHAR;
|
||||||
|
import static java.sql.Types.CLOB;
|
||||||
|
import static java.sql.Types.DATE;
|
||||||
|
import static java.sql.Types.DECIMAL;
|
||||||
|
import static java.sql.Types.DOUBLE;
|
||||||
|
import static java.sql.Types.FLOAT;
|
||||||
|
import static java.sql.Types.INTEGER;
|
||||||
|
import static java.sql.Types.JAVA_OBJECT;
|
||||||
|
import static java.sql.Types.LONGNVARCHAR;
|
||||||
|
import static java.sql.Types.LONGVARBINARY;
|
||||||
|
import static java.sql.Types.LONGVARCHAR;
|
||||||
|
import static java.sql.Types.NCHAR;
|
||||||
|
import static java.sql.Types.NUMERIC;
|
||||||
|
import static java.sql.Types.NVARCHAR;
|
||||||
|
import static java.sql.Types.OTHER;
|
||||||
|
import static java.sql.Types.REAL;
|
||||||
|
import static java.sql.Types.ROWID;
|
||||||
|
import static java.sql.Types.SMALLINT;
|
||||||
|
import static java.sql.Types.SQLXML;
|
||||||
|
import static java.sql.Types.STRUCT;
|
||||||
|
import static java.sql.Types.TIME;
|
||||||
|
import static java.sql.Types.TIMESTAMP;
|
||||||
|
import static java.sql.Types.TINYINT;
|
||||||
|
import static java.sql.Types.VARBINARY;
|
||||||
|
import static java.sql.Types.VARCHAR;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* JDBC / HiveQL common functions.
|
||||||
|
*/
|
||||||
|
public class HiveJdbcCommon {
|
||||||
|
|
||||||
|
public static final String AVRO = "Avro";
|
||||||
|
public static final String CSV = "CSV";
|
||||||
|
|
||||||
|
public static final String MIME_TYPE_AVRO_BINARY = "application/avro-binary";
|
||||||
|
public static final String CSV_MIME_TYPE = "text/csv";
|
||||||
|
|
||||||
|
|
||||||
|
public static final PropertyDescriptor NORMALIZE_NAMES_FOR_AVRO = new PropertyDescriptor.Builder()
|
||||||
|
.name("hive-normalize-avro")
|
||||||
|
.displayName("Normalize Table/Column Names")
|
||||||
|
.description("Whether to change non-Avro-compatible characters in column names to Avro-compatible characters. For example, colons and periods "
|
||||||
|
+ "will be changed to underscores in order to build a valid Avro record.")
|
||||||
|
.allowableValues("true", "false")
|
||||||
|
.defaultValue("false")
|
||||||
|
.required(true)
|
||||||
|
.build();
|
||||||
|
|
||||||
|
public static long convertToAvroStream(final ResultSet rs, final OutputStream outStream, final int maxRows, boolean convertNames) throws SQLException, IOException {
|
||||||
|
return convertToAvroStream(rs, outStream, null, maxRows, convertNames, null);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
public static long convertToAvroStream(final ResultSet rs, final OutputStream outStream, String recordName, final int maxRows, boolean convertNames, ResultSetRowCallback callback)
|
||||||
|
throws SQLException, IOException {
|
||||||
|
final Schema schema = createSchema(rs, recordName, convertNames);
|
||||||
|
final GenericRecord rec = new GenericData.Record(schema);
|
||||||
|
|
||||||
|
final DatumWriter<GenericRecord> datumWriter = new GenericDatumWriter<>(schema);
|
||||||
|
try (final DataFileWriter<GenericRecord> dataFileWriter = new DataFileWriter<>(datumWriter)) {
|
||||||
|
dataFileWriter.create(schema, outStream);
|
||||||
|
|
||||||
|
final ResultSetMetaData meta = rs.getMetaData();
|
||||||
|
final int nrOfColumns = meta.getColumnCount();
|
||||||
|
long nrOfRows = 0;
|
||||||
|
while (rs.next()) {
|
||||||
|
if (callback != null) {
|
||||||
|
callback.processRow(rs);
|
||||||
|
}
|
||||||
|
for (int i = 1; i <= nrOfColumns; i++) {
|
||||||
|
final int javaSqlType = meta.getColumnType(i);
|
||||||
|
Object value = rs.getObject(i);
|
||||||
|
|
||||||
|
if (value == null) {
|
||||||
|
rec.put(i - 1, null);
|
||||||
|
|
||||||
|
} else if (javaSqlType == BINARY || javaSqlType == VARBINARY || javaSqlType == LONGVARBINARY || javaSqlType == BLOB || javaSqlType == CLOB) {
|
||||||
|
// bytes requires little bit different handling
|
||||||
|
ByteBuffer bb = null;
|
||||||
|
if (value instanceof byte[]) {
|
||||||
|
bb = ByteBuffer.wrap((byte[]) value);
|
||||||
|
} else if (value instanceof ByteBuffer) {
|
||||||
|
bb = (ByteBuffer) value;
|
||||||
|
}
|
||||||
|
if (bb != null) {
|
||||||
|
rec.put(i - 1, bb);
|
||||||
|
} else {
|
||||||
|
throw new IOException("Could not process binary object of type " + value.getClass().getName());
|
||||||
|
}
|
||||||
|
|
||||||
|
} else if (value instanceof Byte) {
|
||||||
|
// tinyint(1) type is returned by JDBC driver as java.sql.Types.TINYINT
|
||||||
|
// But value is returned by JDBC as java.lang.Byte
|
||||||
|
// (at least H2 JDBC works this way)
|
||||||
|
// direct put to avro record results:
|
||||||
|
// org.apache.avro.AvroRuntimeException: Unknown datum type java.lang.Byte
|
||||||
|
rec.put(i - 1, ((Byte) value).intValue());
|
||||||
|
|
||||||
|
} else if (value instanceof BigDecimal || value instanceof BigInteger) {
|
||||||
|
// Avro can't handle BigDecimal and BigInteger as numbers - it will throw an AvroRuntimeException such as: "Unknown datum type: java.math.BigDecimal: 38"
|
||||||
|
rec.put(i - 1, value.toString());
|
||||||
|
|
||||||
|
} else if (value instanceof Number) {
|
||||||
|
// Need to call the right getXYZ() method (instead of the getObject() method above), since Doubles are sometimes returned
|
||||||
|
// when the JDBC type is 6 (Float) for example.
|
||||||
|
if (javaSqlType == FLOAT) {
|
||||||
|
value = rs.getFloat(i);
|
||||||
|
} else if (javaSqlType == DOUBLE) {
|
||||||
|
value = rs.getDouble(i);
|
||||||
|
} else if (javaSqlType == INTEGER || javaSqlType == TINYINT || javaSqlType == SMALLINT) {
|
||||||
|
value = rs.getInt(i);
|
||||||
|
}
|
||||||
|
|
||||||
|
rec.put(i - 1, value);
|
||||||
|
|
||||||
|
} else if (value instanceof Boolean) {
|
||||||
|
rec.put(i - 1, value);
|
||||||
|
} else if (value instanceof java.sql.SQLXML) {
|
||||||
|
rec.put(i - 1, ((java.sql.SQLXML) value).getString());
|
||||||
|
} else {
|
||||||
|
// The different types that we support are numbers (int, long, double, float),
|
||||||
|
// as well as boolean values and Strings. Since Avro doesn't provide
|
||||||
|
// timestamp types, we want to convert those to Strings. So we will cast anything other
|
||||||
|
// than numbers or booleans to strings by using the toString() method.
|
||||||
|
rec.put(i - 1, value.toString());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
dataFileWriter.append(rec);
|
||||||
|
nrOfRows += 1;
|
||||||
|
|
||||||
|
if (maxRows > 0 && nrOfRows == maxRows)
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
return nrOfRows;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public static Schema createSchema(final ResultSet rs, boolean convertNames) throws SQLException {
|
||||||
|
return createSchema(rs, null, false);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Creates an Avro schema from a result set. If the table/record name is known a priori and provided, use that as a
|
||||||
|
* fallback for the record name if it cannot be retrieved from the result set, and finally fall back to a default value.
|
||||||
|
*
|
||||||
|
* @param rs The result set to convert to Avro
|
||||||
|
* @param recordName The a priori record name to use if it cannot be determined from the result set.
|
||||||
|
* @param convertNames Whether to convert column/table names to be legal Avro names
|
||||||
|
* @return A Schema object representing the result set converted to an Avro record
|
||||||
|
* @throws SQLException if any error occurs during conversion
|
||||||
|
*/
|
||||||
|
public static Schema createSchema(final ResultSet rs, String recordName, boolean convertNames) throws SQLException {
|
||||||
|
final ResultSetMetaData meta = rs.getMetaData();
|
||||||
|
final int nrOfColumns = meta.getColumnCount();
|
||||||
|
String tableName = StringUtils.isEmpty(recordName) ? "NiFi_SelectHiveQL_Record" : recordName;
|
||||||
|
try {
|
||||||
|
if (nrOfColumns > 0) {
|
||||||
|
// Hive JDBC doesn't support getTableName, instead it returns table.column for column name. Grab the table name from the first column
|
||||||
|
String firstColumnNameFromMeta = meta.getColumnName(1);
|
||||||
|
int tableNameDelimiter = firstColumnNameFromMeta.lastIndexOf(".");
|
||||||
|
if (tableNameDelimiter > -1) {
|
||||||
|
String tableNameFromMeta = firstColumnNameFromMeta.substring(0, tableNameDelimiter);
|
||||||
|
if (!StringUtils.isBlank(tableNameFromMeta)) {
|
||||||
|
tableName = tableNameFromMeta;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} catch (SQLException se) {
|
||||||
|
// Not all drivers support getTableName, so just use the previously-set default
|
||||||
|
}
|
||||||
|
|
||||||
|
if (convertNames) {
|
||||||
|
tableName = normalizeNameForAvro(tableName);
|
||||||
|
}
|
||||||
|
final FieldAssembler<Schema> builder = SchemaBuilder.record(tableName).namespace("any.data").fields();
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Some missing Avro types - Decimal, Date types. May need some additional work.
|
||||||
|
*/
|
||||||
|
for (int i = 1; i <= nrOfColumns; i++) {
|
||||||
|
String columnNameFromMeta = meta.getColumnName(i);
|
||||||
|
// Hive returns table.column for column name. Grab the column name as the string after the last period
|
||||||
|
int columnNameDelimiter = columnNameFromMeta.lastIndexOf(".");
|
||||||
|
String columnName = columnNameFromMeta.substring(columnNameDelimiter + 1);
|
||||||
|
switch (meta.getColumnType(i)) {
|
||||||
|
case CHAR:
|
||||||
|
case LONGNVARCHAR:
|
||||||
|
case LONGVARCHAR:
|
||||||
|
case NCHAR:
|
||||||
|
case NVARCHAR:
|
||||||
|
case VARCHAR:
|
||||||
|
case ARRAY:
|
||||||
|
case STRUCT:
|
||||||
|
case JAVA_OBJECT:
|
||||||
|
case OTHER:
|
||||||
|
case SQLXML:
|
||||||
|
builder.name(columnName).type().unionOf().nullBuilder().endNull().and().stringType().endUnion().noDefault();
|
||||||
|
break;
|
||||||
|
|
||||||
|
case BIT:
|
||||||
|
case BOOLEAN:
|
||||||
|
builder.name(columnName).type().unionOf().nullBuilder().endNull().and().booleanType().endUnion().noDefault();
|
||||||
|
break;
|
||||||
|
|
||||||
|
case INTEGER:
|
||||||
|
// Default to signed type unless otherwise noted. Some JDBC drivers don't implement isSigned()
|
||||||
|
boolean signedType = true;
|
||||||
|
try {
|
||||||
|
signedType = meta.isSigned(i);
|
||||||
|
} catch (SQLException se) {
|
||||||
|
// Use signed types as default
|
||||||
|
}
|
||||||
|
if (signedType) {
|
||||||
|
builder.name(columnName).type().unionOf().nullBuilder().endNull().and().intType().endUnion().noDefault();
|
||||||
|
} else {
|
||||||
|
builder.name(columnName).type().unionOf().nullBuilder().endNull().and().longType().endUnion().noDefault();
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
|
||||||
|
case SMALLINT:
|
||||||
|
case TINYINT:
|
||||||
|
builder.name(columnName).type().unionOf().nullBuilder().endNull().and().intType().endUnion().noDefault();
|
||||||
|
break;
|
||||||
|
|
||||||
|
case BIGINT:
|
||||||
|
builder.name(columnName).type().unionOf().nullBuilder().endNull().and().longType().endUnion().noDefault();
|
||||||
|
break;
|
||||||
|
|
||||||
|
// java.sql.RowId is interface, is seems to be database
|
||||||
|
// implementation specific, let's convert to String
|
||||||
|
case ROWID:
|
||||||
|
builder.name(columnName).type().unionOf().nullBuilder().endNull().and().stringType().endUnion().noDefault();
|
||||||
|
break;
|
||||||
|
|
||||||
|
case FLOAT:
|
||||||
|
case REAL:
|
||||||
|
builder.name(columnName).type().unionOf().nullBuilder().endNull().and().floatType().endUnion().noDefault();
|
||||||
|
break;
|
||||||
|
|
||||||
|
case DOUBLE:
|
||||||
|
builder.name(columnName).type().unionOf().nullBuilder().endNull().and().doubleType().endUnion().noDefault();
|
||||||
|
break;
|
||||||
|
|
||||||
|
// Did not find direct suitable type, need to be clarified!!!!
|
||||||
|
case DECIMAL:
|
||||||
|
case NUMERIC:
|
||||||
|
builder.name(columnName).type().unionOf().nullBuilder().endNull().and().stringType().endUnion().noDefault();
|
||||||
|
break;
|
||||||
|
|
||||||
|
// Did not find direct suitable type, need to be clarified!!!!
|
||||||
|
case DATE:
|
||||||
|
case TIME:
|
||||||
|
case TIMESTAMP:
|
||||||
|
builder.name(columnName).type().unionOf().nullBuilder().endNull().and().stringType().endUnion().noDefault();
|
||||||
|
break;
|
||||||
|
|
||||||
|
case BINARY:
|
||||||
|
case VARBINARY:
|
||||||
|
case LONGVARBINARY:
|
||||||
|
case BLOB:
|
||||||
|
case CLOB:
|
||||||
|
builder.name(columnName).type().unionOf().nullBuilder().endNull().and().bytesType().endUnion().noDefault();
|
||||||
|
break;
|
||||||
|
|
||||||
|
|
||||||
|
default:
|
||||||
|
throw new IllegalArgumentException("createSchema: Unknown SQL type " + meta.getColumnType(i) + " cannot be converted to Avro type");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return builder.endRecord();
|
||||||
|
}
|
||||||
|
|
||||||
|
public static long convertToCsvStream(final ResultSet rs, final OutputStream outStream, CsvOutputOptions outputOptions) throws SQLException, IOException {
|
||||||
|
return convertToCsvStream(rs, outStream, null, null, outputOptions);
|
||||||
|
}
|
||||||
|
|
||||||
|
public static long convertToCsvStream(final ResultSet rs, final OutputStream outStream, String recordName, ResultSetRowCallback callback, CsvOutputOptions outputOptions)
|
||||||
|
throws SQLException, IOException {
|
||||||
|
|
||||||
|
final ResultSetMetaData meta = rs.getMetaData();
|
||||||
|
final int nrOfColumns = meta.getColumnCount();
|
||||||
|
List<String> columnNames = new ArrayList<>(nrOfColumns);
|
||||||
|
|
||||||
|
if (outputOptions.isHeader()) {
|
||||||
|
if (outputOptions.getAltHeader() == null) {
|
||||||
|
for (int i = 1; i <= nrOfColumns; i++) {
|
||||||
|
String columnNameFromMeta = meta.getColumnName(i);
|
||||||
|
// Hive returns table.column for column name. Grab the column name as the string after the last period
|
||||||
|
int columnNameDelimiter = columnNameFromMeta.lastIndexOf(".");
|
||||||
|
columnNames.add(columnNameFromMeta.substring(columnNameDelimiter + 1));
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
String[] altHeaderNames = outputOptions.getAltHeader().split(",");
|
||||||
|
columnNames = Arrays.asList(altHeaderNames);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Write column names as header row
|
||||||
|
outStream.write(StringUtils.join(columnNames, outputOptions.getDelimiter()).getBytes(StandardCharsets.UTF_8));
|
||||||
|
if (outputOptions.isHeader()) {
|
||||||
|
outStream.write("\n".getBytes(StandardCharsets.UTF_8));
|
||||||
|
}
|
||||||
|
|
||||||
|
// Iterate over the rows
|
||||||
|
int maxRows = outputOptions.getMaxRowsPerFlowFile();
|
||||||
|
long nrOfRows = 0;
|
||||||
|
while (rs.next()) {
|
||||||
|
if (callback != null) {
|
||||||
|
callback.processRow(rs);
|
||||||
|
}
|
||||||
|
List<String> rowValues = new ArrayList<>(nrOfColumns);
|
||||||
|
for (int i = 1; i <= nrOfColumns; i++) {
|
||||||
|
final int javaSqlType = meta.getColumnType(i);
|
||||||
|
final Object value = rs.getObject(i);
|
||||||
|
|
||||||
|
switch (javaSqlType) {
|
||||||
|
case CHAR:
|
||||||
|
case LONGNVARCHAR:
|
||||||
|
case LONGVARCHAR:
|
||||||
|
case NCHAR:
|
||||||
|
case NVARCHAR:
|
||||||
|
case VARCHAR:
|
||||||
|
String valueString = rs.getString(i);
|
||||||
|
if (valueString != null) {
|
||||||
|
// Removed extra quotes as those are a part of the escapeCsv when required.
|
||||||
|
StringBuilder sb = new StringBuilder();
|
||||||
|
if (outputOptions.isQuote()) {
|
||||||
|
sb.append("\"");
|
||||||
|
if (outputOptions.isEscape()) {
|
||||||
|
sb.append(StringEscapeUtils.escapeCsv(valueString));
|
||||||
|
} else {
|
||||||
|
sb.append(valueString);
|
||||||
|
}
|
||||||
|
sb.append("\"");
|
||||||
|
rowValues.add(sb.toString());
|
||||||
|
} else {
|
||||||
|
if (outputOptions.isEscape()) {
|
||||||
|
rowValues.add(StringEscapeUtils.escapeCsv(valueString));
|
||||||
|
} else {
|
||||||
|
rowValues.add(valueString);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
rowValues.add("");
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
case ARRAY:
|
||||||
|
case STRUCT:
|
||||||
|
case JAVA_OBJECT:
|
||||||
|
String complexValueString = rs.getString(i);
|
||||||
|
if (complexValueString != null) {
|
||||||
|
rowValues.add(StringEscapeUtils.escapeCsv(complexValueString));
|
||||||
|
} else {
|
||||||
|
rowValues.add("");
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
case SQLXML:
|
||||||
|
if (value != null) {
|
||||||
|
rowValues.add(StringEscapeUtils.escapeCsv(((java.sql.SQLXML) value).getString()));
|
||||||
|
} else {
|
||||||
|
rowValues.add("");
|
||||||
|
}
|
||||||
|
default:
|
||||||
|
if (value != null) {
|
||||||
|
rowValues.add(value.toString());
|
||||||
|
} else {
|
||||||
|
rowValues.add("");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// Write row values
|
||||||
|
outStream.write(StringUtils.join(rowValues, outputOptions.getDelimiter()).getBytes(StandardCharsets.UTF_8));
|
||||||
|
outStream.write("\n".getBytes(StandardCharsets.UTF_8));
|
||||||
|
nrOfRows++;
|
||||||
|
|
||||||
|
if (maxRows > 0 && nrOfRows == maxRows)
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
return nrOfRows;
|
||||||
|
}
|
||||||
|
|
||||||
|
public static String normalizeNameForAvro(String inputName) {
|
||||||
|
String normalizedName = inputName.replaceAll("[^A-Za-z0-9_]", "_");
|
||||||
|
if (Character.isDigit(normalizedName.charAt(0))) {
|
||||||
|
normalizedName = "_" + normalizedName;
|
||||||
|
}
|
||||||
|
return normalizedName;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* An interface for callback methods which allows processing of a row during the convertToXYZStream() processing.
|
||||||
|
* <b>IMPORTANT:</b> This method should only work on the row pointed at by the current ResultSet reference.
|
||||||
|
* Advancing the cursor (e.g.) can cause rows to be skipped during Avro transformation.
|
||||||
|
*/
|
||||||
|
public interface ResultSetRowCallback {
|
||||||
|
void processRow(ResultSet resultSet) throws IOException;
|
||||||
|
}
|
||||||
|
|
||||||
|
public static Configuration getConfigurationFromFiles(final String configFiles) {
|
||||||
|
final Configuration hiveConfig = new HiveConf();
|
||||||
|
if (StringUtils.isNotBlank(configFiles)) {
|
||||||
|
for (final String configFile : configFiles.split(",")) {
|
||||||
|
hiveConfig.addResource(new Path(configFile.trim()));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return hiveConfig;
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,53 @@
|
||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. The ASF licenses this file
|
||||||
|
* to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.apache.nifi.util.hive;
|
||||||
|
|
||||||
|
import org.apache.nifi.components.ValidationResult;
|
||||||
|
import org.apache.nifi.components.Validator;
|
||||||
|
|
||||||
|
import java.io.File;
|
||||||
|
|
||||||
|
public class HiveUtils {
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Validates that one or more files exist, as specified in a single property.
|
||||||
|
*/
|
||||||
|
public static Validator createMultipleFilesExistValidator() {
|
||||||
|
return (subject, input, context) -> {
|
||||||
|
if (context.isExpressionLanguageSupported(subject) && context.isExpressionLanguagePresent(input)) {
|
||||||
|
return new ValidationResult.Builder().subject(subject).input(input).explanation("Expression Language Present").valid(true).build();
|
||||||
|
}
|
||||||
|
final String[] files = input.split("\\s*,\\s*");
|
||||||
|
for (String filename : files) {
|
||||||
|
try {
|
||||||
|
final File file = new File(filename.trim());
|
||||||
|
final boolean valid = file.exists() && file.isFile();
|
||||||
|
if (!valid) {
|
||||||
|
final String message = "File " + file + " does not exist or is not a file";
|
||||||
|
return new ValidationResult.Builder().subject(subject).input(input).valid(false).explanation(message).build();
|
||||||
|
}
|
||||||
|
} catch (SecurityException e) {
|
||||||
|
final String message = "Unable to access " + filename + " due to " + e.getMessage();
|
||||||
|
return new ValidationResult.Builder().subject(subject).input(input).valid(false).explanation(message).build();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return new ValidationResult.Builder().subject(subject).input(input).valid(true).build();
|
||||||
|
};
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,41 @@
|
||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
package org.apache.nifi.util.hive;
|
||||||
|
|
||||||
|
import org.apache.hadoop.conf.Configuration;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* A helper class for maintaining loaded configurations (to avoid reloading on use unless necessary)
|
||||||
|
*/
|
||||||
|
public class ValidationResources {
|
||||||
|
|
||||||
|
private final String configResources;
|
||||||
|
private final Configuration configuration;
|
||||||
|
|
||||||
|
public ValidationResources(String configResources, Configuration configuration) {
|
||||||
|
this.configResources = configResources;
|
||||||
|
this.configuration = configuration;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getConfigResources() {
|
||||||
|
return configResources;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Configuration getConfiguration() {
|
||||||
|
return configuration;
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,15 @@
|
||||||
|
# Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
# contributor license agreements. See the NOTICE file distributed with
|
||||||
|
# this work for additional information regarding copyright ownership.
|
||||||
|
# The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
# (the "License"); you may not use this file except in compliance with
|
||||||
|
# the License. You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
org.apache.nifi.dbcp.hive.Hive_1_1ConnectionPool
|
|
@ -0,0 +1,16 @@
|
||||||
|
# Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
# contributor license agreements. See the NOTICE file distributed with
|
||||||
|
# this work for additional information regarding copyright ownership.
|
||||||
|
# The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
# (the "License"); you may not use this file except in compliance with
|
||||||
|
# the License. You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
org.apache.nifi.processors.hive.SelectHive_1_1QL
|
||||||
|
org.apache.nifi.processors.hive.PutHive_1_1QL
|
|
@ -0,0 +1,176 @@
|
||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.apache.nifi.dbcp.hive;
|
||||||
|
|
||||||
|
import static org.junit.Assert.assertEquals;
|
||||||
|
import static org.mockito.Matchers.isA;
|
||||||
|
import static org.mockito.Mockito.mock;
|
||||||
|
import static org.mockito.Mockito.when;
|
||||||
|
|
||||||
|
import java.io.File;
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.lang.reflect.Field;
|
||||||
|
import java.lang.reflect.UndeclaredThrowableException;
|
||||||
|
import java.security.PrivilegedExceptionAction;
|
||||||
|
import java.sql.SQLException;
|
||||||
|
import java.util.HashMap;
|
||||||
|
import java.util.Map;
|
||||||
|
|
||||||
|
import org.apache.commons.dbcp.BasicDataSource;
|
||||||
|
import org.apache.hadoop.security.UserGroupInformation;
|
||||||
|
import org.apache.nifi.components.PropertyDescriptor;
|
||||||
|
import org.apache.nifi.controller.AbstractControllerService;
|
||||||
|
import org.apache.nifi.hadoop.KerberosProperties;
|
||||||
|
import org.apache.nifi.logging.ComponentLog;
|
||||||
|
import org.apache.nifi.processor.exception.ProcessException;
|
||||||
|
import org.apache.nifi.registry.VariableDescriptor;
|
||||||
|
import org.apache.nifi.reporting.InitializationException;
|
||||||
|
import org.apache.nifi.util.MockConfigurationContext;
|
||||||
|
import org.apache.nifi.util.MockVariableRegistry;
|
||||||
|
import org.junit.Before;
|
||||||
|
import org.junit.Ignore;
|
||||||
|
import org.junit.Test;
|
||||||
|
|
||||||
|
public class Hive_1_1ConnectionPoolTest {
|
||||||
|
private UserGroupInformation userGroupInformation;
|
||||||
|
private Hive_1_1ConnectionPool hiveConnectionPool;
|
||||||
|
private BasicDataSource basicDataSource;
|
||||||
|
private ComponentLog componentLog;
|
||||||
|
private File krb5conf = new File("src/test/resources/krb5.conf");
|
||||||
|
|
||||||
|
@Before
|
||||||
|
public void setup() throws Exception {
|
||||||
|
// have to initialize this system property before anything else
|
||||||
|
System.setProperty("java.security.krb5.conf", krb5conf.getAbsolutePath());
|
||||||
|
System.setProperty("java.security.krb5.realm", "nifi.com");
|
||||||
|
System.setProperty("java.security.krb5.kdc", "nifi.kdc");
|
||||||
|
|
||||||
|
userGroupInformation = mock(UserGroupInformation.class);
|
||||||
|
basicDataSource = mock(BasicDataSource.class);
|
||||||
|
componentLog = mock(ComponentLog.class);
|
||||||
|
|
||||||
|
when(userGroupInformation.doAs(isA(PrivilegedExceptionAction.class))).thenAnswer(invocation -> {
|
||||||
|
try {
|
||||||
|
return ((PrivilegedExceptionAction) invocation.getArguments()[0]).run();
|
||||||
|
} catch (IOException | Error | RuntimeException | InterruptedException e) {
|
||||||
|
throw e;
|
||||||
|
} catch (Throwable e) {
|
||||||
|
throw new UndeclaredThrowableException(e);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
initPool();
|
||||||
|
}
|
||||||
|
|
||||||
|
private void initPool() throws Exception {
|
||||||
|
hiveConnectionPool = new Hive_1_1ConnectionPool();
|
||||||
|
|
||||||
|
Field ugiField = Hive_1_1ConnectionPool.class.getDeclaredField("ugi");
|
||||||
|
ugiField.setAccessible(true);
|
||||||
|
ugiField.set(hiveConnectionPool, userGroupInformation);
|
||||||
|
|
||||||
|
Field dataSourceField = Hive_1_1ConnectionPool.class.getDeclaredField("dataSource");
|
||||||
|
dataSourceField.setAccessible(true);
|
||||||
|
dataSourceField.set(hiveConnectionPool, basicDataSource);
|
||||||
|
|
||||||
|
Field componentLogField = AbstractControllerService.class.getDeclaredField("logger");
|
||||||
|
componentLogField.setAccessible(true);
|
||||||
|
componentLogField.set(hiveConnectionPool, componentLog);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test(expected = ProcessException.class)
|
||||||
|
public void testGetConnectionSqlException() throws SQLException {
|
||||||
|
SQLException sqlException = new SQLException("bad sql");
|
||||||
|
when(basicDataSource.getConnection()).thenThrow(sqlException);
|
||||||
|
try {
|
||||||
|
hiveConnectionPool.getConnection();
|
||||||
|
} catch (ProcessException e) {
|
||||||
|
assertEquals(sqlException, e.getCause());
|
||||||
|
throw e;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testExpressionLanguageSupport() throws Exception {
|
||||||
|
final String URL = "jdbc:hive2://localhost:10000/default";
|
||||||
|
final String USER = "user";
|
||||||
|
final String PASS = "pass";
|
||||||
|
final int MAX_CONN = 7;
|
||||||
|
final String MAX_WAIT = "10 sec"; // 10000 milliseconds
|
||||||
|
final String CONF = "/path/to/hive-site.xml";
|
||||||
|
hiveConnectionPool = new Hive_1_1ConnectionPool();
|
||||||
|
|
||||||
|
Map<PropertyDescriptor, String> props = new HashMap<PropertyDescriptor, String>() {{
|
||||||
|
put(Hive_1_1ConnectionPool.DATABASE_URL, "${url}");
|
||||||
|
put(Hive_1_1ConnectionPool.DB_USER, "${username}");
|
||||||
|
put(Hive_1_1ConnectionPool.DB_PASSWORD, "${password}");
|
||||||
|
put(Hive_1_1ConnectionPool.MAX_TOTAL_CONNECTIONS, "${maxconn}");
|
||||||
|
put(Hive_1_1ConnectionPool.MAX_WAIT_TIME, "${maxwait}");
|
||||||
|
put(Hive_1_1ConnectionPool.HIVE_CONFIGURATION_RESOURCES, "${hiveconf}");
|
||||||
|
}};
|
||||||
|
|
||||||
|
MockVariableRegistry registry = new MockVariableRegistry();
|
||||||
|
registry.setVariable(new VariableDescriptor("url"), URL);
|
||||||
|
registry.setVariable(new VariableDescriptor("username"), USER);
|
||||||
|
registry.setVariable(new VariableDescriptor("password"), PASS);
|
||||||
|
registry.setVariable(new VariableDescriptor("maxconn"), Integer.toString(MAX_CONN));
|
||||||
|
registry.setVariable(new VariableDescriptor("maxwait"), MAX_WAIT);
|
||||||
|
registry.setVariable(new VariableDescriptor("hiveconf"), CONF);
|
||||||
|
|
||||||
|
|
||||||
|
MockConfigurationContext context = new MockConfigurationContext(props, null, registry);
|
||||||
|
hiveConnectionPool.onConfigured(context);
|
||||||
|
|
||||||
|
Field dataSourceField = Hive_1_1ConnectionPool.class.getDeclaredField("dataSource");
|
||||||
|
dataSourceField.setAccessible(true);
|
||||||
|
basicDataSource = (BasicDataSource) dataSourceField.get(hiveConnectionPool);
|
||||||
|
assertEquals(URL, basicDataSource.getUrl());
|
||||||
|
assertEquals(USER, basicDataSource.getUsername());
|
||||||
|
assertEquals(PASS, basicDataSource.getPassword());
|
||||||
|
assertEquals(MAX_CONN, basicDataSource.getMaxActive());
|
||||||
|
assertEquals(10000L, basicDataSource.getMaxWait());
|
||||||
|
assertEquals(URL, hiveConnectionPool.getConnectionURL());
|
||||||
|
}
|
||||||
|
|
||||||
|
@Ignore("Kerberos does not seem to be properly handled in Travis build, but, locally, this test should successfully run")
|
||||||
|
@Test(expected = InitializationException.class)
|
||||||
|
public void testKerberosAuthException() throws Exception {
|
||||||
|
final String URL = "jdbc:hive2://localhost:10000/default";
|
||||||
|
final String conf = "src/test/resources/hive-site-security.xml";
|
||||||
|
final String ktab = "src/test/resources/fake.keytab";
|
||||||
|
final String kprinc = "bad@PRINCIPAL.COM";
|
||||||
|
|
||||||
|
KerberosProperties kerbProperties = new KerberosProperties(krb5conf);
|
||||||
|
|
||||||
|
Map<PropertyDescriptor, String> props = new HashMap<PropertyDescriptor, String>() {{
|
||||||
|
put(Hive_1_1ConnectionPool.DATABASE_URL, "${url}");
|
||||||
|
put(Hive_1_1ConnectionPool.HIVE_CONFIGURATION_RESOURCES, "${conf}");
|
||||||
|
put(kerbProperties.getKerberosKeytab(), "${ktab}");
|
||||||
|
put(kerbProperties.getKerberosPrincipal(), "${kprinc}");
|
||||||
|
}};
|
||||||
|
|
||||||
|
MockVariableRegistry registry = new MockVariableRegistry();
|
||||||
|
registry.setVariable(new VariableDescriptor("url"), URL);
|
||||||
|
registry.setVariable(new VariableDescriptor("conf"), conf);
|
||||||
|
registry.setVariable(new VariableDescriptor("ktab"), ktab);
|
||||||
|
registry.setVariable(new VariableDescriptor("kprinc"), kprinc);
|
||||||
|
|
||||||
|
MockConfigurationContext context = new MockConfigurationContext(props, null, registry);
|
||||||
|
hiveConnectionPool.onConfigured(context);
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,292 @@
|
||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
package org.apache.nifi.processors.hive;
|
||||||
|
|
||||||
|
import org.apache.nifi.processor.ProcessContext;
|
||||||
|
import org.apache.nifi.processor.ProcessSessionFactory;
|
||||||
|
import org.apache.nifi.processor.ProcessorInitializationContext;
|
||||||
|
import org.apache.nifi.processor.exception.ProcessException;
|
||||||
|
import org.apache.nifi.util.MockProcessContext;
|
||||||
|
import org.apache.nifi.util.MockProcessorInitializationContext;
|
||||||
|
import org.junit.Before;
|
||||||
|
import org.junit.Test;
|
||||||
|
|
||||||
|
import java.util.Set;
|
||||||
|
import java.util.concurrent.atomic.AtomicInteger;
|
||||||
|
|
||||||
|
import static org.junit.Assert.assertEquals;
|
||||||
|
import static org.junit.Assert.assertTrue;
|
||||||
|
|
||||||
|
public class TestHiveParser extends AbstractHive_1_1QLProcessor {
|
||||||
|
|
||||||
|
@Before
|
||||||
|
public void initialize() {
|
||||||
|
final MockProcessContext processContext = new MockProcessContext(this);
|
||||||
|
final ProcessorInitializationContext initializationContext = new MockProcessorInitializationContext(this, processContext);
|
||||||
|
initialize(initializationContext);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void onTrigger(ProcessContext context, ProcessSessionFactory sessionFactory) throws ProcessException {
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void parseSelect() {
|
||||||
|
String query = "select a.empid, to_something(b.saraly) from " +
|
||||||
|
"company.emp a inner join default.salary b where a.empid = b.empid";
|
||||||
|
final Set<TableName> tableNames = findTableNames(query);
|
||||||
|
System.out.printf("tableNames=%s\n", tableNames);
|
||||||
|
assertEquals(2, tableNames.size());
|
||||||
|
assertTrue(tableNames.contains(new TableName("company", "emp", true)));
|
||||||
|
assertTrue(tableNames.contains(new TableName("default", "salary", true)));
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void parseSelectPrepared() {
|
||||||
|
String query = "select empid from company.emp a where a.firstName = ?";
|
||||||
|
final Set<TableName> tableNames = findTableNames(query);
|
||||||
|
System.out.printf("tableNames=%s\n", tableNames);
|
||||||
|
assertEquals(1, tableNames.size());
|
||||||
|
assertTrue(tableNames.contains(new TableName("company", "emp", true)));
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void parseLongSelect() {
|
||||||
|
String query = "select\n" +
|
||||||
|
"\n" +
|
||||||
|
" i_item_id,\n" +
|
||||||
|
"\n" +
|
||||||
|
" i_item_desc,\n" +
|
||||||
|
"\n" +
|
||||||
|
" s_state,\n" +
|
||||||
|
"\n" +
|
||||||
|
" count(ss_quantity) as store_sales_quantitycount,\n" +
|
||||||
|
"\n" +
|
||||||
|
" avg(ss_quantity) as store_sales_quantityave,\n" +
|
||||||
|
"\n" +
|
||||||
|
" stddev_samp(ss_quantity) as store_sales_quantitystdev,\n" +
|
||||||
|
"\n" +
|
||||||
|
" stddev_samp(ss_quantity) / avg(ss_quantity) as store_sales_quantitycov,\n" +
|
||||||
|
"\n" +
|
||||||
|
" count(sr_return_quantity) as store_returns_quantitycount,\n" +
|
||||||
|
"\n" +
|
||||||
|
" avg(sr_return_quantity) as store_returns_quantityave,\n" +
|
||||||
|
"\n" +
|
||||||
|
" stddev_samp(sr_return_quantity) as store_returns_quantitystdev,\n" +
|
||||||
|
"\n" +
|
||||||
|
" stddev_samp(sr_return_quantity) / avg(sr_return_quantity) as store_returns_quantitycov,\n" +
|
||||||
|
"\n" +
|
||||||
|
" count(cs_quantity) as catalog_sales_quantitycount,\n" +
|
||||||
|
"\n" +
|
||||||
|
" avg(cs_quantity) as catalog_sales_quantityave,\n" +
|
||||||
|
"\n" +
|
||||||
|
" stddev_samp(cs_quantity) / avg(cs_quantity) as catalog_sales_quantitystdev,\n" +
|
||||||
|
"\n" +
|
||||||
|
" stddev_samp(cs_quantity) / avg(cs_quantity) as catalog_sales_quantitycov\n" +
|
||||||
|
"\n" +
|
||||||
|
"from\n" +
|
||||||
|
"\n" +
|
||||||
|
" store_sales,\n" +
|
||||||
|
"\n" +
|
||||||
|
" store_returns,\n" +
|
||||||
|
"\n" +
|
||||||
|
" catalog_sales,\n" +
|
||||||
|
"\n" +
|
||||||
|
" date_dim d1,\n" +
|
||||||
|
"\n" +
|
||||||
|
" date_dim d2,\n" +
|
||||||
|
"\n" +
|
||||||
|
" date_dim d3,\n" +
|
||||||
|
"\n" +
|
||||||
|
" store,\n" +
|
||||||
|
"\n" +
|
||||||
|
" item\n" +
|
||||||
|
"\n" +
|
||||||
|
"where\n" +
|
||||||
|
"\n" +
|
||||||
|
" d1.d_quarter_name = '2000Q1'\n" +
|
||||||
|
"\n" +
|
||||||
|
" and d1.d_date_sk = ss_sold_date_sk\n" +
|
||||||
|
"\n" +
|
||||||
|
" and i_item_sk = ss_item_sk\n" +
|
||||||
|
"\n" +
|
||||||
|
" and s_store_sk = ss_store_sk\n" +
|
||||||
|
"\n" +
|
||||||
|
" and ss_customer_sk = sr_customer_sk\n" +
|
||||||
|
"\n" +
|
||||||
|
" and ss_item_sk = sr_item_sk\n" +
|
||||||
|
"\n" +
|
||||||
|
" and ss_ticket_number = sr_ticket_number\n" +
|
||||||
|
"\n" +
|
||||||
|
" and sr_returned_date_sk = d2.d_date_sk\n" +
|
||||||
|
"\n" +
|
||||||
|
" and d2.d_quarter_name in ('2000Q1' , '2000Q2', '2000Q3')\n" +
|
||||||
|
"\n" +
|
||||||
|
" and sr_customer_sk = cs_bill_customer_sk\n" +
|
||||||
|
"\n" +
|
||||||
|
" and sr_item_sk = cs_item_sk\n" +
|
||||||
|
"\n" +
|
||||||
|
" and cs_sold_date_sk = d3.d_date_sk\n" +
|
||||||
|
"\n" +
|
||||||
|
" and d3.d_quarter_name in ('2000Q1' , '2000Q2', '2000Q3')\n" +
|
||||||
|
"\n" +
|
||||||
|
"group by i_item_id , i_item_desc , s_state\n" +
|
||||||
|
"\n" +
|
||||||
|
"order by i_item_id , i_item_desc , s_state\n" +
|
||||||
|
"\n" +
|
||||||
|
"limit 100";
|
||||||
|
|
||||||
|
final Set<TableName> tableNames = findTableNames(query);
|
||||||
|
System.out.printf("tableNames=%s\n", tableNames);
|
||||||
|
assertEquals(6, tableNames.size());
|
||||||
|
AtomicInteger cnt = new AtomicInteger(0);
|
||||||
|
for (TableName tableName : tableNames) {
|
||||||
|
if (tableName.equals(new TableName(null, "store_sales", true))) {
|
||||||
|
cnt.incrementAndGet();
|
||||||
|
} else if (tableName.equals(new TableName(null, "store_returns", true))) {
|
||||||
|
cnt.incrementAndGet();
|
||||||
|
} else if (tableName.equals(new TableName(null, "catalog_sales", true))) {
|
||||||
|
cnt.incrementAndGet();
|
||||||
|
} else if (tableName.equals(new TableName(null, "date_dim", true))) {
|
||||||
|
cnt.incrementAndGet();
|
||||||
|
} else if (tableName.equals(new TableName(null, "store", true))) {
|
||||||
|
cnt.incrementAndGet();
|
||||||
|
} else if (tableName.equals(new TableName(null, "item", true))) {
|
||||||
|
cnt.incrementAndGet();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
assertEquals(6, cnt.get());
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void parseSelectInsert() {
|
||||||
|
String query = "insert into databaseA.tableA select key, max(value) from databaseA.tableA where category = 'x'";
|
||||||
|
|
||||||
|
// The same database.tableName can appear two times for input and output.
|
||||||
|
final Set<TableName> tableNames = findTableNames(query);
|
||||||
|
System.out.printf("tableNames=%s\n", tableNames);
|
||||||
|
assertEquals(2, tableNames.size());
|
||||||
|
AtomicInteger cnt = new AtomicInteger(0);
|
||||||
|
tableNames.forEach(tableName -> {
|
||||||
|
if (tableName.equals(new TableName("databaseA", "tableA", false))) {
|
||||||
|
cnt.incrementAndGet();
|
||||||
|
} else if (tableName.equals(new TableName("databaseA", "tableA", true))) {
|
||||||
|
cnt.incrementAndGet();
|
||||||
|
}
|
||||||
|
});
|
||||||
|
assertEquals(2, cnt.get());
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void parseInsert() {
|
||||||
|
String query = "insert into databaseB.tableB1 select something from tableA1 a1 inner join tableA2 a2 where a1.id = a2.id";
|
||||||
|
|
||||||
|
final Set<TableName> tableNames = findTableNames(query);
|
||||||
|
System.out.printf("tableNames=%s\n", tableNames);
|
||||||
|
assertEquals(3, tableNames.size());
|
||||||
|
AtomicInteger cnt = new AtomicInteger(0);
|
||||||
|
tableNames.forEach(tableName -> {
|
||||||
|
if (tableName.equals(new TableName("databaseB", "tableB1", false))) {
|
||||||
|
cnt.incrementAndGet();
|
||||||
|
} else if (tableName.equals(new TableName(null, "tableA1", true))) {
|
||||||
|
cnt.incrementAndGet();
|
||||||
|
} else if (tableName.equals(new TableName(null, "tableA2", true))) {
|
||||||
|
cnt.incrementAndGet();
|
||||||
|
}
|
||||||
|
});
|
||||||
|
assertEquals(3, cnt.get());
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void parseUpdate() {
|
||||||
|
String query = "update table_a set y = 'updated' where x > 100";
|
||||||
|
|
||||||
|
final Set<TableName> tableNames = findTableNames(query);
|
||||||
|
System.out.printf("tableNames=%s\n", tableNames);
|
||||||
|
assertEquals(1, tableNames.size());
|
||||||
|
assertTrue(tableNames.contains(new TableName(null, "table_a", false)));
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void parseDelete() {
|
||||||
|
String query = "delete from table_a where x > 100";
|
||||||
|
|
||||||
|
final Set<TableName> tableNames = findTableNames(query);
|
||||||
|
System.out.printf("tableNames=%s\n", tableNames);
|
||||||
|
assertEquals(1, tableNames.size());
|
||||||
|
assertTrue(tableNames.contains(new TableName(null, "table_a", false)));
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void parseDDL() {
|
||||||
|
String query = "CREATE TABLE IF NOT EXISTS EMPLOYEES(\n" +
|
||||||
|
"EmployeeID INT,FirstName STRING, Title STRING,\n" +
|
||||||
|
"State STRING, Laptop STRING)\n" +
|
||||||
|
"COMMENT 'Employee Names'\n" +
|
||||||
|
"STORED AS ORC";
|
||||||
|
|
||||||
|
|
||||||
|
final Set<TableName> tableNames = findTableNames(query);
|
||||||
|
System.out.printf("tableNames=%s\n", tableNames);
|
||||||
|
assertEquals(1, tableNames.size());
|
||||||
|
assertTrue(tableNames.contains(new TableName(null, "EMPLOYEES", false)));
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void parseSetProperty() {
|
||||||
|
String query = " set 'hive.exec.dynamic.partition.mode'=nonstrict";
|
||||||
|
final Set<TableName> tableNames = findTableNames(query);
|
||||||
|
System.out.printf("tableNames=%s\n", tableNames);
|
||||||
|
assertEquals(0, tableNames.size());
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void parseSetRole() {
|
||||||
|
String query = "set role all";
|
||||||
|
final Set<TableName> tableNames = findTableNames(query);
|
||||||
|
System.out.printf("tableNames=%s\n", tableNames);
|
||||||
|
assertEquals(0, tableNames.size());
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void parseShowRoles() {
|
||||||
|
String query = "show roles";
|
||||||
|
final Set<TableName> tableNames = findTableNames(query);
|
||||||
|
System.out.printf("tableNames=%s\n", tableNames);
|
||||||
|
assertEquals(0, tableNames.size());
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void parseMsck() {
|
||||||
|
String query = "msck repair table table_a";
|
||||||
|
final Set<TableName> tableNames = findTableNames(query);
|
||||||
|
System.out.printf("tableNames=%s\n", tableNames);
|
||||||
|
assertEquals(1, tableNames.size());
|
||||||
|
assertTrue(tableNames.contains(new TableName(null, "table_a", false)));
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void parseAddJar() {
|
||||||
|
String query = "ADD JAR hdfs:///tmp/my_jar.jar";
|
||||||
|
final Set<TableName> tableNames = findTableNames(query);
|
||||||
|
System.out.printf("tableNames=%s\n", tableNames);
|
||||||
|
assertEquals(0, tableNames.size());
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -0,0 +1,841 @@
|
||||||
|
package org.apache.nifi.processors.hive;/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
import org.apache.nifi.controller.AbstractControllerService;
|
||||||
|
import org.apache.nifi.dbcp.DBCPService;
|
||||||
|
import org.apache.nifi.dbcp.hive.Hive_1_1DBCPService;
|
||||||
|
import org.apache.nifi.processor.exception.ProcessException;
|
||||||
|
import org.apache.nifi.processor.util.pattern.RollbackOnFailure;
|
||||||
|
import org.apache.nifi.reporting.InitializationException;
|
||||||
|
import org.apache.nifi.util.TestRunner;
|
||||||
|
import org.apache.nifi.util.TestRunners;
|
||||||
|
import org.junit.BeforeClass;
|
||||||
|
import org.junit.Rule;
|
||||||
|
import org.junit.Test;
|
||||||
|
import org.junit.rules.TemporaryFolder;
|
||||||
|
import org.mockito.Mockito;
|
||||||
|
|
||||||
|
import java.io.File;
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.sql.Connection;
|
||||||
|
import java.sql.DriverManager;
|
||||||
|
import java.sql.ResultSet;
|
||||||
|
import java.sql.SQLException;
|
||||||
|
import java.sql.Statement;
|
||||||
|
import java.sql.Types;
|
||||||
|
import java.util.HashMap;
|
||||||
|
import java.util.Map;
|
||||||
|
|
||||||
|
import static org.junit.Assert.assertEquals;
|
||||||
|
import static org.junit.Assert.assertFalse;
|
||||||
|
import static org.junit.Assert.assertTrue;
|
||||||
|
import static org.junit.Assert.fail;
|
||||||
|
|
||||||
|
public class TestPutHive_1_1QL {
|
||||||
|
private static final String createPersons = "CREATE TABLE PERSONS (id integer primary key, name varchar(100), code integer)";
|
||||||
|
private static final String createPersonsAutoId = "CREATE TABLE PERSONS (id INTEGER NOT NULL GENERATED ALWAYS AS IDENTITY (START WITH 1), name VARCHAR(100), code INTEGER check(code <= 100))";
|
||||||
|
|
||||||
|
@Rule
|
||||||
|
public TemporaryFolder folder = new TemporaryFolder();
|
||||||
|
|
||||||
|
@BeforeClass
|
||||||
|
public static void setup() {
|
||||||
|
System.setProperty("derby.stream.error.file", "target/derby.log");
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testDirectStatements() throws InitializationException, ProcessException, SQLException, IOException {
|
||||||
|
final TestRunner runner = TestRunners.newTestRunner(PutHive_1_1QL.class);
|
||||||
|
final File tempDir = folder.getRoot();
|
||||||
|
final File dbDir = new File(tempDir, "db");
|
||||||
|
final DBCPService service = new MockDBCPService(dbDir.getAbsolutePath());
|
||||||
|
runner.addControllerService("dbcp", service);
|
||||||
|
runner.enableControllerService(service);
|
||||||
|
|
||||||
|
try (final Connection conn = service.getConnection()) {
|
||||||
|
try (final Statement stmt = conn.createStatement()) {
|
||||||
|
stmt.executeUpdate(createPersons);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
runner.setProperty(PutHive_1_1QL.HIVE_DBCP_SERVICE, "dbcp");
|
||||||
|
runner.enqueue("INSERT INTO PERSONS (ID, NAME, CODE) VALUES (1, 'Mark', 84)".getBytes());
|
||||||
|
runner.run();
|
||||||
|
|
||||||
|
runner.assertAllFlowFilesTransferred(PutHive_1_1QL.REL_SUCCESS, 1);
|
||||||
|
|
||||||
|
try (final Connection conn = service.getConnection()) {
|
||||||
|
try (final Statement stmt = conn.createStatement()) {
|
||||||
|
final ResultSet rs = stmt.executeQuery("SELECT * FROM PERSONS");
|
||||||
|
assertTrue(rs.next());
|
||||||
|
assertEquals(1, rs.getInt(1));
|
||||||
|
assertEquals("Mark", rs.getString(2));
|
||||||
|
assertEquals(84, rs.getInt(3));
|
||||||
|
assertFalse(rs.next());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
runner.enqueue("UPDATE PERSONS SET NAME='George' WHERE ID=1".getBytes());
|
||||||
|
runner.run();
|
||||||
|
|
||||||
|
try (final Connection conn = service.getConnection()) {
|
||||||
|
try (final Statement stmt = conn.createStatement()) {
|
||||||
|
final ResultSet rs = stmt.executeQuery("SELECT * FROM PERSONS");
|
||||||
|
assertTrue(rs.next());
|
||||||
|
assertEquals(1, rs.getInt(1));
|
||||||
|
assertEquals("George", rs.getString(2));
|
||||||
|
assertEquals(84, rs.getInt(3));
|
||||||
|
assertFalse(rs.next());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testFailInMiddleWithBadStatementRollbackOnFailure() throws InitializationException, ProcessException, SQLException, IOException {
|
||||||
|
final TestRunner runner = TestRunners.newTestRunner(PutHive_1_1QL.class);
|
||||||
|
final File tempDir = folder.getRoot();
|
||||||
|
final File dbDir = new File(tempDir, "db");
|
||||||
|
final DBCPService service = new MockDBCPService(dbDir.getAbsolutePath());
|
||||||
|
runner.addControllerService("dbcp", service);
|
||||||
|
runner.enableControllerService(service);
|
||||||
|
runner.setProperty(RollbackOnFailure.ROLLBACK_ON_FAILURE, "true");
|
||||||
|
|
||||||
|
try (final Connection conn = service.getConnection()) {
|
||||||
|
try (final Statement stmt = conn.createStatement()) {
|
||||||
|
stmt.executeUpdate(createPersonsAutoId);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
runner.setProperty(PutHive_1_1QL.HIVE_DBCP_SERVICE, "dbcp");
|
||||||
|
runner.enqueue("INSERT INTO PERSONS (NAME, CODE) VALUES ('Mark', 84)".getBytes());
|
||||||
|
runner.enqueue("INSERT INTO PERSONS".getBytes()); // intentionally wrong syntax
|
||||||
|
runner.enqueue("INSERT INTO PERSONS (NAME, CODE) VALUES ('Tom', 3)".getBytes());
|
||||||
|
runner.enqueue("INSERT INTO PERSONS (NAME, CODE) VALUES ('Harry', 44)".getBytes());
|
||||||
|
runner.run();
|
||||||
|
|
||||||
|
// The 1st one should be routed to success, others should stay in queue.
|
||||||
|
assertEquals(3, runner.getQueueSize().getObjectCount());
|
||||||
|
runner.assertTransferCount(PutHive_1_1QL.REL_FAILURE, 0);
|
||||||
|
runner.assertTransferCount(PutHive_1_1QL.REL_SUCCESS, 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testFailAtBeginning() throws InitializationException, ProcessException, SQLException, IOException {
|
||||||
|
final TestRunner runner = TestRunners.newTestRunner(PutHive_1_1QL.class);
|
||||||
|
final File tempDir = folder.getRoot();
|
||||||
|
final File dbDir = new File(tempDir, "db");
|
||||||
|
final DBCPService service = new MockDBCPService(dbDir.getAbsolutePath());
|
||||||
|
runner.addControllerService("dbcp", service);
|
||||||
|
runner.enableControllerService(service);
|
||||||
|
|
||||||
|
try (final Connection conn = service.getConnection()) {
|
||||||
|
try (final Statement stmt = conn.createStatement()) {
|
||||||
|
stmt.executeUpdate(createPersonsAutoId);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
runner.setProperty(PutHive_1_1QL.HIVE_DBCP_SERVICE, "dbcp");
|
||||||
|
runner.enqueue("INSERT INTO PERSONS".getBytes()); // intentionally wrong syntax
|
||||||
|
runner.enqueue("INSERT INTO PERSONS (NAME, CODE) VALUES ('Tom', 3)".getBytes());
|
||||||
|
runner.enqueue("INSERT INTO PERSONS (NAME, CODE) VALUES ('Harry', 44)".getBytes());
|
||||||
|
runner.run();
|
||||||
|
|
||||||
|
runner.assertTransferCount(PutHive_1_1QL.REL_FAILURE, 1);
|
||||||
|
runner.assertTransferCount(PutHive_1_1QL.REL_SUCCESS, 2);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testFailAtBeginningRollbackOnFailure() throws InitializationException, ProcessException, SQLException, IOException {
|
||||||
|
final TestRunner runner = TestRunners.newTestRunner(PutHive_1_1QL.class);
|
||||||
|
final File tempDir = folder.getRoot();
|
||||||
|
final File dbDir = new File(tempDir, "db");
|
||||||
|
final DBCPService service = new MockDBCPService(dbDir.getAbsolutePath());
|
||||||
|
runner.addControllerService("dbcp", service);
|
||||||
|
runner.enableControllerService(service);
|
||||||
|
runner.setProperty(RollbackOnFailure.ROLLBACK_ON_FAILURE, "true");
|
||||||
|
|
||||||
|
try (final Connection conn = service.getConnection()) {
|
||||||
|
try (final Statement stmt = conn.createStatement()) {
|
||||||
|
stmt.executeUpdate(createPersonsAutoId);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
runner.setProperty(PutHive_1_1QL.HIVE_DBCP_SERVICE, "dbcp");
|
||||||
|
runner.enqueue("INSERT INTO PERSONS".getBytes()); // intentionally wrong syntax
|
||||||
|
runner.enqueue("INSERT INTO PERSONS (NAME, CODE) VALUES ('Tom', 3)".getBytes());
|
||||||
|
runner.enqueue("INSERT INTO PERSONS (NAME, CODE) VALUES ('Harry', 44)".getBytes());
|
||||||
|
try {
|
||||||
|
runner.run();
|
||||||
|
fail("ProcessException should be thrown");
|
||||||
|
} catch (AssertionError e) {
|
||||||
|
assertTrue(e.getCause() instanceof ProcessException);
|
||||||
|
}
|
||||||
|
|
||||||
|
assertEquals(3, runner.getQueueSize().getObjectCount());
|
||||||
|
runner.assertTransferCount(PutHive_1_1QL.REL_FAILURE, 0);
|
||||||
|
runner.assertTransferCount(PutHive_1_1QL.REL_SUCCESS, 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testFailInMiddleWithBadParameterType() throws InitializationException, ProcessException, SQLException, IOException {
|
||||||
|
final TestRunner runner = TestRunners.newTestRunner(PutHive_1_1QL.class);
|
||||||
|
final File tempDir = folder.getRoot();
|
||||||
|
final File dbDir = new File(tempDir, "db");
|
||||||
|
final DBCPService service = new MockDBCPService(dbDir.getAbsolutePath());
|
||||||
|
runner.addControllerService("dbcp", service);
|
||||||
|
runner.enableControllerService(service);
|
||||||
|
|
||||||
|
try (final Connection conn = service.getConnection()) {
|
||||||
|
try (final Statement stmt = conn.createStatement()) {
|
||||||
|
stmt.executeUpdate(createPersonsAutoId);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
runner.setProperty(PutHive_1_1QL.HIVE_DBCP_SERVICE, "dbcp");
|
||||||
|
|
||||||
|
final Map<String, String> goodAttributes = new HashMap<>();
|
||||||
|
goodAttributes.put("hiveql.args.1.type", String.valueOf(Types.INTEGER));
|
||||||
|
goodAttributes.put("hiveql.args.1.value", "84");
|
||||||
|
|
||||||
|
final Map<String, String> badAttributes = new HashMap<>();
|
||||||
|
badAttributes.put("hiveql.args.1.type", String.valueOf(Types.VARCHAR));
|
||||||
|
badAttributes.put("hiveql.args.1.value", "hello");
|
||||||
|
|
||||||
|
final byte[] data = "INSERT INTO PERSONS (NAME, CODE) VALUES ('Mark', ?)".getBytes();
|
||||||
|
runner.enqueue(data, goodAttributes);
|
||||||
|
runner.enqueue(data, badAttributes);
|
||||||
|
runner.enqueue(data, goodAttributes);
|
||||||
|
runner.enqueue(data, goodAttributes);
|
||||||
|
runner.run();
|
||||||
|
|
||||||
|
runner.assertTransferCount(PutHive_1_1QL.REL_FAILURE, 1);
|
||||||
|
runner.assertTransferCount(PutHive_1_1QL.REL_SUCCESS, 3);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testFailInMiddleWithBadParameterValue() throws InitializationException, ProcessException, SQLException, IOException {
|
||||||
|
final TestRunner runner = TestRunners.newTestRunner(PutHive_1_1QL.class);
|
||||||
|
final File tempDir = folder.getRoot();
|
||||||
|
final File dbDir = new File(tempDir, "db");
|
||||||
|
final DBCPService service = new MockDBCPService(dbDir.getAbsolutePath());
|
||||||
|
runner.addControllerService("dbcp", service);
|
||||||
|
runner.enableControllerService(service);
|
||||||
|
|
||||||
|
try (final Connection conn = service.getConnection()) {
|
||||||
|
try (final Statement stmt = conn.createStatement()) {
|
||||||
|
stmt.executeUpdate(createPersonsAutoId);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
runner.setProperty(PutHive_1_1QL.HIVE_DBCP_SERVICE, "dbcp");
|
||||||
|
|
||||||
|
final Map<String, String> goodAttributes = new HashMap<>();
|
||||||
|
goodAttributes.put("hiveql.args.1.type", String.valueOf(Types.INTEGER));
|
||||||
|
goodAttributes.put("hiveql.args.1.value", "84");
|
||||||
|
|
||||||
|
final Map<String, String> badAttributes = new HashMap<>();
|
||||||
|
badAttributes.put("hiveql.args.1.type", String.valueOf(Types.INTEGER));
|
||||||
|
badAttributes.put("hiveql.args.1.value", "101"); // Constraint violation, up to 100
|
||||||
|
|
||||||
|
final byte[] data = "INSERT INTO PERSONS (NAME, CODE) VALUES ('Mark', ?)".getBytes();
|
||||||
|
runner.enqueue(data, goodAttributes);
|
||||||
|
runner.enqueue(data, badAttributes);
|
||||||
|
runner.enqueue(data, goodAttributes);
|
||||||
|
runner.enqueue(data, goodAttributes);
|
||||||
|
runner.run();
|
||||||
|
|
||||||
|
runner.assertTransferCount(PutHive_1_1QL.REL_SUCCESS, 3);
|
||||||
|
runner.assertTransferCount(PutHive_1_1QL.REL_FAILURE, 1);
|
||||||
|
|
||||||
|
try (final Connection conn = service.getConnection()) {
|
||||||
|
try (final Statement stmt = conn.createStatement()) {
|
||||||
|
final ResultSet rs = stmt.executeQuery("SELECT * FROM PERSONS");
|
||||||
|
assertTrue(rs.next());
|
||||||
|
assertEquals(1, rs.getInt(1));
|
||||||
|
assertEquals("Mark", rs.getString(2));
|
||||||
|
assertEquals(84, rs.getInt(3));
|
||||||
|
assertTrue(rs.next());
|
||||||
|
assertTrue(rs.next());
|
||||||
|
assertFalse(rs.next());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testFailInMiddleWithBadNumberFormat() throws InitializationException, ProcessException, SQLException, IOException {
|
||||||
|
final TestRunner runner = TestRunners.newTestRunner(PutHive_1_1QL.class);
|
||||||
|
final File tempDir = folder.getRoot();
|
||||||
|
final File dbDir = new File(tempDir, "db");
|
||||||
|
final DBCPService service = new MockDBCPService(dbDir.getAbsolutePath());
|
||||||
|
runner.addControllerService("dbcp", service);
|
||||||
|
runner.enableControllerService(service);
|
||||||
|
|
||||||
|
try (final Connection conn = service.getConnection()) {
|
||||||
|
try (final Statement stmt = conn.createStatement()) {
|
||||||
|
stmt.executeUpdate(createPersonsAutoId);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
runner.setProperty(PutHive_1_1QL.HIVE_DBCP_SERVICE, "dbcp");
|
||||||
|
|
||||||
|
final Map<String, String> goodAttributes = new HashMap<>();
|
||||||
|
goodAttributes.put("hiveql.args.1.type", String.valueOf(Types.INTEGER));
|
||||||
|
goodAttributes.put("hiveql.args.1.value", "84");
|
||||||
|
|
||||||
|
final Map<String, String> badAttributes = new HashMap<>();
|
||||||
|
badAttributes.put("hiveql.args.1.type", String.valueOf(Types.INTEGER));
|
||||||
|
badAttributes.put("hiveql.args.1.value", "NOT_NUMBER");
|
||||||
|
|
||||||
|
final byte[] data = "INSERT INTO PERSONS (NAME, CODE) VALUES ('Mark', ?)".getBytes();
|
||||||
|
runner.enqueue(data, goodAttributes);
|
||||||
|
runner.enqueue(data, badAttributes);
|
||||||
|
runner.enqueue(data, goodAttributes);
|
||||||
|
runner.enqueue(data, goodAttributes);
|
||||||
|
runner.run();
|
||||||
|
|
||||||
|
runner.assertTransferCount(PutHive_1_1QL.REL_SUCCESS, 3);
|
||||||
|
runner.assertTransferCount(PutHive_1_1QL.REL_FAILURE, 1);
|
||||||
|
|
||||||
|
try (final Connection conn = service.getConnection()) {
|
||||||
|
try (final Statement stmt = conn.createStatement()) {
|
||||||
|
final ResultSet rs = stmt.executeQuery("SELECT * FROM PERSONS");
|
||||||
|
assertTrue(rs.next());
|
||||||
|
assertEquals(1, rs.getInt(1));
|
||||||
|
assertEquals("Mark", rs.getString(2));
|
||||||
|
assertEquals(84, rs.getInt(3));
|
||||||
|
assertTrue(rs.next());
|
||||||
|
assertTrue(rs.next());
|
||||||
|
assertFalse(rs.next());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testUsingSqlDataTypesWithNegativeValues() throws InitializationException, ProcessException, SQLException, IOException {
|
||||||
|
final TestRunner runner = TestRunners.newTestRunner(PutHive_1_1QL.class);
|
||||||
|
final File tempDir = folder.getRoot();
|
||||||
|
final File dbDir = new File(tempDir, "db");
|
||||||
|
final DBCPService service = new MockDBCPService(dbDir.getAbsolutePath());
|
||||||
|
runner.addControllerService("dbcp", service);
|
||||||
|
runner.enableControllerService(service);
|
||||||
|
|
||||||
|
try (final Connection conn = service.getConnection()) {
|
||||||
|
try (final Statement stmt = conn.createStatement()) {
|
||||||
|
stmt.executeUpdate("CREATE TABLE PERSONS (id integer primary key, name varchar(100), code bigint)");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
runner.setProperty(PutHive_1_1QL.HIVE_DBCP_SERVICE, "dbcp");
|
||||||
|
final Map<String, String> attributes = new HashMap<>();
|
||||||
|
attributes.put("hiveql.args.1.type", "-5");
|
||||||
|
attributes.put("hiveql.args.1.value", "84");
|
||||||
|
runner.enqueue("INSERT INTO PERSONS VALUES (1, 'Mark', ?)".getBytes(), attributes);
|
||||||
|
runner.run();
|
||||||
|
|
||||||
|
runner.assertAllFlowFilesTransferred(PutHive_1_1QL.REL_SUCCESS, 1);
|
||||||
|
runner.getFlowFilesForRelationship(PutHive_1_1QL.REL_SUCCESS).get(0).assertAttributeEquals(PutHive_1_1QL.ATTR_OUTPUT_TABLES, "PERSONS");
|
||||||
|
|
||||||
|
try (final Connection conn = service.getConnection()) {
|
||||||
|
try (final Statement stmt = conn.createStatement()) {
|
||||||
|
final ResultSet rs = stmt.executeQuery("SELECT * FROM PERSONS");
|
||||||
|
assertTrue(rs.next());
|
||||||
|
assertEquals(1, rs.getInt(1));
|
||||||
|
assertEquals("Mark", rs.getString(2));
|
||||||
|
assertEquals(84, rs.getInt(3));
|
||||||
|
assertFalse(rs.next());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testStatementsWithPreparedParameters() throws InitializationException, ProcessException, SQLException, IOException {
|
||||||
|
final TestRunner runner = TestRunners.newTestRunner(PutHive_1_1QL.class);
|
||||||
|
final File tempDir = folder.getRoot();
|
||||||
|
final File dbDir = new File(tempDir, "db");
|
||||||
|
final DBCPService service = new MockDBCPService(dbDir.getAbsolutePath());
|
||||||
|
runner.addControllerService("dbcp", service);
|
||||||
|
runner.enableControllerService(service);
|
||||||
|
|
||||||
|
try (final Connection conn = service.getConnection()) {
|
||||||
|
try (final Statement stmt = conn.createStatement()) {
|
||||||
|
stmt.executeUpdate(createPersons);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
runner.setProperty(PutHive_1_1QL.HIVE_DBCP_SERVICE, "dbcp");
|
||||||
|
final Map<String, String> attributes = new HashMap<>();
|
||||||
|
attributes.put("hiveql.args.1.type", String.valueOf(Types.INTEGER));
|
||||||
|
attributes.put("hiveql.args.1.value", "1");
|
||||||
|
|
||||||
|
attributes.put("hiveql.args.2.type", String.valueOf(Types.VARCHAR));
|
||||||
|
attributes.put("hiveql.args.2.value", "Mark");
|
||||||
|
|
||||||
|
attributes.put("hiveql.args.3.type", String.valueOf(Types.INTEGER));
|
||||||
|
attributes.put("hiveql.args.3.value", "84");
|
||||||
|
|
||||||
|
runner.enqueue("INSERT INTO PERSONS (ID, NAME, CODE) VALUES (?, ?, ?)".getBytes(), attributes);
|
||||||
|
runner.run();
|
||||||
|
|
||||||
|
runner.assertAllFlowFilesTransferred(PutHive_1_1QL.REL_SUCCESS, 1);
|
||||||
|
|
||||||
|
try (final Connection conn = service.getConnection()) {
|
||||||
|
try (final Statement stmt = conn.createStatement()) {
|
||||||
|
final ResultSet rs = stmt.executeQuery("SELECT * FROM PERSONS");
|
||||||
|
assertTrue(rs.next());
|
||||||
|
assertEquals(1, rs.getInt(1));
|
||||||
|
assertEquals("Mark", rs.getString(2));
|
||||||
|
assertEquals(84, rs.getInt(3));
|
||||||
|
assertFalse(rs.next());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
runner.clearTransferState();
|
||||||
|
|
||||||
|
attributes.clear();
|
||||||
|
attributes.put("hiveql.args.1.type", String.valueOf(Types.VARCHAR));
|
||||||
|
attributes.put("hiveql.args.1.value", "George");
|
||||||
|
|
||||||
|
attributes.put("hiveql.args.2.type", String.valueOf(Types.INTEGER));
|
||||||
|
attributes.put("hiveql.args.2.value", "1");
|
||||||
|
|
||||||
|
runner.enqueue("UPDATE PERSONS SET NAME=? WHERE ID=?".getBytes(), attributes);
|
||||||
|
runner.run();
|
||||||
|
runner.assertAllFlowFilesTransferred(PutHive_1_1QL.REL_SUCCESS, 1);
|
||||||
|
|
||||||
|
try (final Connection conn = service.getConnection()) {
|
||||||
|
try (final Statement stmt = conn.createStatement()) {
|
||||||
|
final ResultSet rs = stmt.executeQuery("SELECT * FROM PERSONS");
|
||||||
|
assertTrue(rs.next());
|
||||||
|
assertEquals(1, rs.getInt(1));
|
||||||
|
assertEquals("George", rs.getString(2));
|
||||||
|
assertEquals(84, rs.getInt(3));
|
||||||
|
assertFalse(rs.next());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testMultipleStatementsWithinFlowFile() throws InitializationException, ProcessException, SQLException, IOException {
|
||||||
|
final TestRunner runner = TestRunners.newTestRunner(PutHive_1_1QL.class);
|
||||||
|
final File tempDir = folder.getRoot();
|
||||||
|
final File dbDir = new File(tempDir, "db");
|
||||||
|
final DBCPService service = new MockDBCPService(dbDir.getAbsolutePath());
|
||||||
|
runner.addControllerService("dbcp", service);
|
||||||
|
runner.enableControllerService(service);
|
||||||
|
|
||||||
|
try (final Connection conn = service.getConnection()) {
|
||||||
|
try (final Statement stmt = conn.createStatement()) {
|
||||||
|
stmt.executeUpdate(createPersons);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
runner.setProperty(PutHive_1_1QL.HIVE_DBCP_SERVICE, "dbcp");
|
||||||
|
|
||||||
|
final String sql = "INSERT INTO PERSONS (ID, NAME, CODE) VALUES (?, ?, ?); " +
|
||||||
|
"UPDATE PERSONS SET NAME='George' WHERE ID=?; ";
|
||||||
|
final Map<String, String> attributes = new HashMap<>();
|
||||||
|
attributes.put("hiveql.args.1.type", String.valueOf(Types.INTEGER));
|
||||||
|
attributes.put("hiveql.args.1.value", "1");
|
||||||
|
|
||||||
|
attributes.put("hiveql.args.2.type", String.valueOf(Types.VARCHAR));
|
||||||
|
attributes.put("hiveql.args.2.value", "Mark");
|
||||||
|
|
||||||
|
attributes.put("hiveql.args.3.type", String.valueOf(Types.INTEGER));
|
||||||
|
attributes.put("hiveql.args.3.value", "84");
|
||||||
|
|
||||||
|
attributes.put("hiveql.args.4.type", String.valueOf(Types.INTEGER));
|
||||||
|
attributes.put("hiveql.args.4.value", "1");
|
||||||
|
|
||||||
|
runner.enqueue(sql.getBytes(), attributes);
|
||||||
|
runner.run();
|
||||||
|
|
||||||
|
// should fail because of the semicolon
|
||||||
|
runner.assertAllFlowFilesTransferred(PutHive_1_1QL.REL_SUCCESS, 1);
|
||||||
|
runner.getFlowFilesForRelationship(PutHive_1_1QL.REL_SUCCESS)
|
||||||
|
.forEach(f -> f.assertAttributeEquals(PutHive_1_1QL.ATTR_OUTPUT_TABLES, "PERSONS"));
|
||||||
|
|
||||||
|
// Now we can check that the values were inserted by the multi-statement script.
|
||||||
|
try (final Connection conn = service.getConnection()) {
|
||||||
|
try (final Statement stmt = conn.createStatement()) {
|
||||||
|
final ResultSet rs = stmt.executeQuery("SELECT * FROM PERSONS");
|
||||||
|
assertTrue(rs.next());
|
||||||
|
assertEquals("Record ID mismatch", 1, rs.getInt(1));
|
||||||
|
assertEquals("Record NAME mismatch", "George", rs.getString(2));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testMultipleStatementsWithinFlowFilePlusEmbeddedDelimiter() throws InitializationException, ProcessException, SQLException, IOException {
|
||||||
|
final TestRunner runner = TestRunners.newTestRunner(PutHive_1_1QL.class);
|
||||||
|
final File tempDir = folder.getRoot();
|
||||||
|
final File dbDir = new File(tempDir, "db");
|
||||||
|
final DBCPService service = new MockDBCPService(dbDir.getAbsolutePath());
|
||||||
|
runner.addControllerService("dbcp", service);
|
||||||
|
runner.enableControllerService(service);
|
||||||
|
|
||||||
|
try (final Connection conn = service.getConnection()) {
|
||||||
|
try (final Statement stmt = conn.createStatement()) {
|
||||||
|
stmt.executeUpdate(createPersons);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
runner.setProperty(PutHive_1_1QL.HIVE_DBCP_SERVICE, "dbcp");
|
||||||
|
|
||||||
|
final String sql = "INSERT INTO PERSONS (ID, NAME, CODE) VALUES (?, ?, ?); " +
|
||||||
|
"UPDATE PERSONS SET NAME='George\\;' WHERE ID=?; ";
|
||||||
|
final Map<String, String> attributes = new HashMap<>();
|
||||||
|
attributes.put("hiveql.args.1.type", String.valueOf(Types.INTEGER));
|
||||||
|
attributes.put("hiveql.args.1.value", "1");
|
||||||
|
|
||||||
|
attributes.put("hiveql.args.2.type", String.valueOf(Types.VARCHAR));
|
||||||
|
attributes.put("hiveql.args.2.value", "Mark");
|
||||||
|
|
||||||
|
attributes.put("hiveql.args.3.type", String.valueOf(Types.INTEGER));
|
||||||
|
attributes.put("hiveql.args.3.value", "84");
|
||||||
|
|
||||||
|
attributes.put("hiveql.args.4.type", String.valueOf(Types.INTEGER));
|
||||||
|
attributes.put("hiveql.args.4.value", "1");
|
||||||
|
|
||||||
|
runner.enqueue(sql.getBytes(), attributes);
|
||||||
|
runner.run();
|
||||||
|
|
||||||
|
// should fail because of the semicolon
|
||||||
|
runner.assertAllFlowFilesTransferred(PutHive_1_1QL.REL_SUCCESS, 1);
|
||||||
|
|
||||||
|
// Now we can check that the values were inserted by the multi-statement script.
|
||||||
|
try (final Connection conn = service.getConnection()) {
|
||||||
|
try (final Statement stmt = conn.createStatement()) {
|
||||||
|
final ResultSet rs = stmt.executeQuery("SELECT * FROM PERSONS");
|
||||||
|
assertTrue(rs.next());
|
||||||
|
assertEquals("Record ID mismatch", 1, rs.getInt(1));
|
||||||
|
assertEquals("Record NAME mismatch", "George\\;", rs.getString(2));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testWithNullParameter() throws InitializationException, ProcessException, SQLException, IOException {
|
||||||
|
final TestRunner runner = TestRunners.newTestRunner(PutHive_1_1QL.class);
|
||||||
|
final File tempDir = folder.getRoot();
|
||||||
|
final File dbDir = new File(tempDir, "db");
|
||||||
|
final DBCPService service = new MockDBCPService(dbDir.getAbsolutePath());
|
||||||
|
runner.addControllerService("dbcp", service);
|
||||||
|
runner.enableControllerService(service);
|
||||||
|
|
||||||
|
try (final Connection conn = service.getConnection()) {
|
||||||
|
try (final Statement stmt = conn.createStatement()) {
|
||||||
|
stmt.executeUpdate(createPersons);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
runner.setProperty(PutHive_1_1QL.HIVE_DBCP_SERVICE, "dbcp");
|
||||||
|
final Map<String, String> attributes = new HashMap<>();
|
||||||
|
attributes.put("hiveql.args.1.type", String.valueOf(Types.INTEGER));
|
||||||
|
attributes.put("hiveql.args.1.value", "1");
|
||||||
|
|
||||||
|
attributes.put("hiveql.args.2.type", String.valueOf(Types.VARCHAR));
|
||||||
|
attributes.put("hiveql.args.2.value", "Mark");
|
||||||
|
|
||||||
|
attributes.put("hiveql.args.3.type", String.valueOf(Types.INTEGER));
|
||||||
|
|
||||||
|
runner.enqueue("INSERT INTO PERSONS (ID, NAME, CODE) VALUES (?, ?, ?)".getBytes(), attributes);
|
||||||
|
runner.run();
|
||||||
|
|
||||||
|
runner.assertAllFlowFilesTransferred(PutHive_1_1QL.REL_SUCCESS, 1);
|
||||||
|
|
||||||
|
try (final Connection conn = service.getConnection()) {
|
||||||
|
try (final Statement stmt = conn.createStatement()) {
|
||||||
|
final ResultSet rs = stmt.executeQuery("SELECT * FROM PERSONS");
|
||||||
|
assertTrue(rs.next());
|
||||||
|
assertEquals(1, rs.getInt(1));
|
||||||
|
assertEquals("Mark", rs.getString(2));
|
||||||
|
assertEquals(0, rs.getInt(3));
|
||||||
|
assertFalse(rs.next());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testInvalidStatement() throws InitializationException, ProcessException, SQLException, IOException {
|
||||||
|
final TestRunner runner = TestRunners.newTestRunner(PutHive_1_1QL.class);
|
||||||
|
final File tempDir = folder.getRoot();
|
||||||
|
final File dbDir = new File(tempDir, "db");
|
||||||
|
final DBCPService service = new MockDBCPService(dbDir.getAbsolutePath());
|
||||||
|
runner.addControllerService("dbcp", service);
|
||||||
|
runner.enableControllerService(service);
|
||||||
|
|
||||||
|
try (final Connection conn = service.getConnection()) {
|
||||||
|
try (final Statement stmt = conn.createStatement()) {
|
||||||
|
stmt.executeUpdate(createPersons);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
runner.setProperty(PutHive_1_1QL.HIVE_DBCP_SERVICE, "dbcp");
|
||||||
|
|
||||||
|
final String sql = "INSERT INTO PERSONS (ID, NAME, CODE) VALUES (?, ?, ?); " +
|
||||||
|
"UPDATE SOME_RANDOM_TABLE NAME='George' WHERE ID=?; ";
|
||||||
|
final Map<String, String> attributes = new HashMap<>();
|
||||||
|
attributes.put("hiveql.args.1.type", String.valueOf(Types.INTEGER));
|
||||||
|
attributes.put("hiveql.args.1.value", "1");
|
||||||
|
|
||||||
|
attributes.put("hiveql.args.2.type", String.valueOf(Types.VARCHAR));
|
||||||
|
attributes.put("hiveql.args.2.value", "Mark");
|
||||||
|
|
||||||
|
attributes.put("hiveql.args.3.type", String.valueOf(Types.INTEGER));
|
||||||
|
attributes.put("hiveql.args.3.value", "84");
|
||||||
|
|
||||||
|
attributes.put("hiveql.args.4.type", String.valueOf(Types.INTEGER));
|
||||||
|
attributes.put("hiveql.args.4.value", "1");
|
||||||
|
|
||||||
|
runner.enqueue(sql.getBytes(), attributes);
|
||||||
|
runner.run();
|
||||||
|
|
||||||
|
// should fail because of the table is invalid
|
||||||
|
runner.assertAllFlowFilesTransferred(PutHive_1_1QL.REL_FAILURE, 1);
|
||||||
|
|
||||||
|
try (final Connection conn = service.getConnection()) {
|
||||||
|
try (final Statement stmt = conn.createStatement()) {
|
||||||
|
final ResultSet rs = stmt.executeQuery("SELECT * FROM PERSONS");
|
||||||
|
assertTrue(rs.next());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testRetryableFailure() throws InitializationException, ProcessException, SQLException, IOException {
|
||||||
|
final TestRunner runner = TestRunners.newTestRunner(PutHive_1_1QL.class);
|
||||||
|
final DBCPService service = new SQLExceptionService(null);
|
||||||
|
runner.addControllerService("dbcp", service);
|
||||||
|
runner.enableControllerService(service);
|
||||||
|
|
||||||
|
runner.setProperty(PutHive_1_1QL.HIVE_DBCP_SERVICE, "dbcp");
|
||||||
|
|
||||||
|
final String sql = "INSERT INTO PERSONS (ID, NAME, CODE) VALUES (?, ?, ?); " +
|
||||||
|
"UPDATE PERSONS SET NAME='George' WHERE ID=?; ";
|
||||||
|
|
||||||
|
final Map<String, String> attributes = new HashMap<>();
|
||||||
|
attributes.put("hiveql.args.1.type", String.valueOf(Types.INTEGER));
|
||||||
|
attributes.put("hiveql.args.1.value", "1");
|
||||||
|
|
||||||
|
attributes.put("hiveql.args.2.type", String.valueOf(Types.VARCHAR));
|
||||||
|
attributes.put("hiveql.args.2.value", "Mark");
|
||||||
|
|
||||||
|
attributes.put("hiveql.args.3.type", String.valueOf(Types.INTEGER));
|
||||||
|
attributes.put("hiveql.args.3.value", "84");
|
||||||
|
|
||||||
|
attributes.put("hiveql.args.4.type", String.valueOf(Types.INTEGER));
|
||||||
|
attributes.put("hiveql.args.4.value", "1");
|
||||||
|
|
||||||
|
runner.enqueue(sql.getBytes(), attributes);
|
||||||
|
runner.run();
|
||||||
|
|
||||||
|
// should fail because there isn't a valid connection and tables don't exist.
|
||||||
|
runner.assertAllFlowFilesTransferred(PutHive_1_1QL.REL_RETRY, 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testRetryableFailureRollbackOnFailure() throws InitializationException, ProcessException, SQLException, IOException {
|
||||||
|
final TestRunner runner = TestRunners.newTestRunner(PutHive_1_1QL.class);
|
||||||
|
final DBCPService service = new SQLExceptionService(null);
|
||||||
|
runner.addControllerService("dbcp", service);
|
||||||
|
runner.enableControllerService(service);
|
||||||
|
|
||||||
|
runner.setProperty(PutHive_1_1QL.HIVE_DBCP_SERVICE, "dbcp");
|
||||||
|
runner.setProperty(RollbackOnFailure.ROLLBACK_ON_FAILURE, "true");
|
||||||
|
|
||||||
|
final String sql = "INSERT INTO PERSONS (ID, NAME, CODE) VALUES (?, ?, ?); " +
|
||||||
|
"UPDATE PERSONS SET NAME='George' WHERE ID=?; ";
|
||||||
|
|
||||||
|
final Map<String, String> attributes = new HashMap<>();
|
||||||
|
attributes.put("hiveql.args.1.type", String.valueOf(Types.INTEGER));
|
||||||
|
attributes.put("hiveql.args.1.value", "1");
|
||||||
|
|
||||||
|
attributes.put("hiveql.args.2.type", String.valueOf(Types.VARCHAR));
|
||||||
|
attributes.put("hiveql.args.2.value", "Mark");
|
||||||
|
|
||||||
|
attributes.put("hiveql.args.3.type", String.valueOf(Types.INTEGER));
|
||||||
|
attributes.put("hiveql.args.3.value", "84");
|
||||||
|
|
||||||
|
attributes.put("hiveql.args.4.type", String.valueOf(Types.INTEGER));
|
||||||
|
attributes.put("hiveql.args.4.value", "1");
|
||||||
|
|
||||||
|
runner.enqueue(sql.getBytes(), attributes);
|
||||||
|
try {
|
||||||
|
runner.run();
|
||||||
|
fail("Should throw ProcessException");
|
||||||
|
} catch (AssertionError e) {
|
||||||
|
assertTrue(e.getCause() instanceof ProcessException);
|
||||||
|
}
|
||||||
|
|
||||||
|
assertEquals(1, runner.getQueueSize().getObjectCount());
|
||||||
|
runner.assertAllFlowFilesTransferred(PutHive_1_1QL.REL_RETRY, 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testUnknownFailure() throws InitializationException, ProcessException {
|
||||||
|
final TestRunner runner = TestRunners.newTestRunner(PutHive_1_1QL.class);
|
||||||
|
final SQLExceptionService service = new SQLExceptionService(null);
|
||||||
|
service.setErrorCode(2);
|
||||||
|
runner.addControllerService("dbcp", service);
|
||||||
|
runner.enableControllerService(service);
|
||||||
|
|
||||||
|
runner.setProperty(PutHive_1_1QL.HIVE_DBCP_SERVICE, "dbcp");
|
||||||
|
|
||||||
|
final String sql = "INSERT INTO PERSONS (ID, NAME, CODE) VALUES (?, ?, ?); " +
|
||||||
|
"UPDATE PERSONS SET NAME='George' WHERE ID=?; ";
|
||||||
|
|
||||||
|
final Map<String, String> attributes = new HashMap<>();
|
||||||
|
attributes.put("hiveql.args.1.type", String.valueOf(Types.INTEGER));
|
||||||
|
attributes.put("hiveql.args.1.value", "1");
|
||||||
|
|
||||||
|
attributes.put("hiveql.args.2.type", String.valueOf(Types.VARCHAR));
|
||||||
|
attributes.put("hiveql.args.2.value", "Mark");
|
||||||
|
|
||||||
|
attributes.put("hiveql.args.3.type", String.valueOf(Types.INTEGER));
|
||||||
|
attributes.put("hiveql.args.3.value", "84");
|
||||||
|
|
||||||
|
attributes.put("hiveql.args.4.type", String.valueOf(Types.INTEGER));
|
||||||
|
attributes.put("hiveql.args.4.value", "1");
|
||||||
|
|
||||||
|
runner.enqueue(sql.getBytes(), attributes);
|
||||||
|
runner.run();
|
||||||
|
|
||||||
|
// should fail because there isn't a valid connection and tables don't exist.
|
||||||
|
runner.assertAllFlowFilesTransferred(PutHive_1_1QL.REL_RETRY, 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testUnknownFailureRollbackOnFailure() throws InitializationException, ProcessException {
|
||||||
|
final TestRunner runner = TestRunners.newTestRunner(PutHive_1_1QL.class);
|
||||||
|
final SQLExceptionService service = new SQLExceptionService(null);
|
||||||
|
service.setErrorCode(0);
|
||||||
|
runner.addControllerService("dbcp", service);
|
||||||
|
runner.enableControllerService(service);
|
||||||
|
|
||||||
|
runner.setProperty(PutHive_1_1QL.HIVE_DBCP_SERVICE, "dbcp");
|
||||||
|
runner.setProperty(RollbackOnFailure.ROLLBACK_ON_FAILURE, "true");
|
||||||
|
|
||||||
|
final String sql = "INSERT INTO PERSONS (ID, NAME, CODE) VALUES (?, ?, ?); " +
|
||||||
|
"UPDATE PERSONS SET NAME='George' WHERE ID=?; ";
|
||||||
|
|
||||||
|
final Map<String, String> attributes = new HashMap<>();
|
||||||
|
attributes.put("hiveql.args.1.type", String.valueOf(Types.INTEGER));
|
||||||
|
attributes.put("hiveql.args.1.value", "1");
|
||||||
|
|
||||||
|
attributes.put("hiveql.args.2.type", String.valueOf(Types.VARCHAR));
|
||||||
|
attributes.put("hiveql.args.2.value", "Mark");
|
||||||
|
|
||||||
|
attributes.put("hiveql.args.3.type", String.valueOf(Types.INTEGER));
|
||||||
|
attributes.put("hiveql.args.3.value", "84");
|
||||||
|
|
||||||
|
attributes.put("hiveql.args.4.type", String.valueOf(Types.INTEGER));
|
||||||
|
attributes.put("hiveql.args.4.value", "1");
|
||||||
|
|
||||||
|
runner.enqueue(sql.getBytes(), attributes);
|
||||||
|
try {
|
||||||
|
runner.run();
|
||||||
|
fail("Should throw ProcessException");
|
||||||
|
} catch (AssertionError e) {
|
||||||
|
assertTrue(e.getCause() instanceof ProcessException);
|
||||||
|
}
|
||||||
|
|
||||||
|
assertEquals(1, runner.getQueueSize().getObjectCount());
|
||||||
|
runner.assertAllFlowFilesTransferred(PutHive_1_1QL.REL_RETRY, 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Simple implementation only for testing purposes
|
||||||
|
*/
|
||||||
|
private static class MockDBCPService extends AbstractControllerService implements Hive_1_1DBCPService {
|
||||||
|
private final String dbLocation;
|
||||||
|
|
||||||
|
MockDBCPService(final String dbLocation) {
|
||||||
|
this.dbLocation = dbLocation;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String getIdentifier() {
|
||||||
|
return "dbcp";
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Connection getConnection() throws ProcessException {
|
||||||
|
try {
|
||||||
|
Class.forName("org.apache.derby.jdbc.EmbeddedDriver");
|
||||||
|
return DriverManager.getConnection("jdbc:derby:" + dbLocation + ";create=true");
|
||||||
|
} catch (final Exception e) {
|
||||||
|
e.printStackTrace();
|
||||||
|
throw new ProcessException("getConnection failed: " + e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String getConnectionURL() {
|
||||||
|
return "jdbc:derby:" + dbLocation + ";create=true";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Simple implementation only for testing purposes
|
||||||
|
*/
|
||||||
|
private static class SQLExceptionService extends AbstractControllerService implements Hive_1_1DBCPService {
|
||||||
|
private final Hive_1_1DBCPService service;
|
||||||
|
private int allowedBeforeFailure = 0;
|
||||||
|
private int successful = 0;
|
||||||
|
private int errorCode = 30000; // Default to a retryable exception code
|
||||||
|
|
||||||
|
SQLExceptionService(final Hive_1_1DBCPService service) {
|
||||||
|
this.service = service;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String getIdentifier() {
|
||||||
|
return "dbcp";
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Connection getConnection() throws ProcessException {
|
||||||
|
try {
|
||||||
|
if (++successful > allowedBeforeFailure) {
|
||||||
|
final Connection conn = Mockito.mock(Connection.class);
|
||||||
|
Mockito.when(conn.prepareStatement(Mockito.any(String.class))).thenThrow(new SQLException("Unit Test Generated SQLException", "42000", errorCode));
|
||||||
|
return conn;
|
||||||
|
} else {
|
||||||
|
return service.getConnection();
|
||||||
|
}
|
||||||
|
} catch (final Exception e) {
|
||||||
|
e.printStackTrace();
|
||||||
|
throw new ProcessException("getConnection failed: " + e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String getConnectionURL() {
|
||||||
|
return service != null ? service.getConnectionURL() : null;
|
||||||
|
}
|
||||||
|
|
||||||
|
void setErrorCode(int errorCode) {
|
||||||
|
this.errorCode = errorCode;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,658 @@
|
||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
package org.apache.nifi.processors.hive;
|
||||||
|
|
||||||
|
import org.apache.avro.file.DataFileStream;
|
||||||
|
import org.apache.avro.generic.GenericDatumReader;
|
||||||
|
import org.apache.avro.generic.GenericRecord;
|
||||||
|
import org.apache.avro.io.DatumReader;
|
||||||
|
import org.apache.nifi.controller.AbstractControllerService;
|
||||||
|
import org.apache.nifi.dbcp.DBCPService;
|
||||||
|
import org.apache.nifi.dbcp.hive.Hive_1_1DBCPService;
|
||||||
|
import org.apache.nifi.flowfile.attributes.CoreAttributes;
|
||||||
|
import org.apache.nifi.processor.exception.ProcessException;
|
||||||
|
import org.apache.nifi.provenance.ProvenanceEventRecord;
|
||||||
|
import org.apache.nifi.provenance.ProvenanceEventType;
|
||||||
|
import org.apache.nifi.reporting.InitializationException;
|
||||||
|
import org.apache.nifi.util.MockFlowFile;
|
||||||
|
import org.apache.nifi.util.TestRunner;
|
||||||
|
import org.apache.nifi.util.TestRunners;
|
||||||
|
import org.apache.nifi.util.hive.HiveJdbcCommon;
|
||||||
|
import org.junit.Before;
|
||||||
|
import org.junit.BeforeClass;
|
||||||
|
import org.junit.Test;
|
||||||
|
import org.slf4j.Logger;
|
||||||
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
|
import java.io.BufferedReader;
|
||||||
|
import java.io.ByteArrayInputStream;
|
||||||
|
import java.io.File;
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.io.InputStream;
|
||||||
|
import java.io.InputStreamReader;
|
||||||
|
import java.sql.Connection;
|
||||||
|
import java.sql.DriverManager;
|
||||||
|
import java.sql.SQLException;
|
||||||
|
import java.sql.Statement;
|
||||||
|
import java.sql.Types;
|
||||||
|
import java.util.HashMap;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Map;
|
||||||
|
import java.util.Random;
|
||||||
|
|
||||||
|
import static org.apache.nifi.processors.hive.SelectHive_1_1QL.HIVEQL_OUTPUT_FORMAT;
|
||||||
|
import static org.apache.nifi.util.hive.HiveJdbcCommon.AVRO;
|
||||||
|
import static org.apache.nifi.util.hive.HiveJdbcCommon.CSV;
|
||||||
|
import static org.apache.nifi.util.hive.HiveJdbcCommon.CSV_MIME_TYPE;
|
||||||
|
import static org.apache.nifi.util.hive.HiveJdbcCommon.MIME_TYPE_AVRO_BINARY;
|
||||||
|
import static org.junit.Assert.assertEquals;
|
||||||
|
import static org.junit.Assert.assertTrue;
|
||||||
|
|
||||||
|
public class TestSelectHive_1_1QL {
|
||||||
|
|
||||||
|
private static final Logger LOGGER;
|
||||||
|
private final static String MAX_ROWS_KEY = "maxRows";
|
||||||
|
private final int NUM_OF_ROWS = 100;
|
||||||
|
|
||||||
|
|
||||||
|
static {
|
||||||
|
System.setProperty("org.slf4j.simpleLogger.defaultLogLevel", "info");
|
||||||
|
System.setProperty("org.slf4j.simpleLogger.showDateTime", "true");
|
||||||
|
System.setProperty("org.slf4j.simpleLogger.log.nifi.io.nio", "debug");
|
||||||
|
System.setProperty("org.slf4j.simpleLogger.log.nifi.processors.hive.SelectHive_1_1QL", "debug");
|
||||||
|
System.setProperty("org.slf4j.simpleLogger.log.nifi.processors.hive.TestSelectHive_1_1QL", "debug");
|
||||||
|
LOGGER = LoggerFactory.getLogger(TestSelectHive_1_1QL.class);
|
||||||
|
}
|
||||||
|
|
||||||
|
private final static String DB_LOCATION = "target/db";
|
||||||
|
|
||||||
|
private final static String QUERY_WITH_EL = "select "
|
||||||
|
+ " PER.ID as PersonId, PER.NAME as PersonName, PER.CODE as PersonCode"
|
||||||
|
+ " from persons PER"
|
||||||
|
+ " where PER.ID > ${person.id}";
|
||||||
|
|
||||||
|
private final static String QUERY_WITHOUT_EL = "select "
|
||||||
|
+ " PER.ID as PersonId, PER.NAME as PersonName, PER.CODE as PersonCode"
|
||||||
|
+ " from persons PER"
|
||||||
|
+ " where PER.ID > 10";
|
||||||
|
|
||||||
|
|
||||||
|
@BeforeClass
|
||||||
|
public static void setupClass() {
|
||||||
|
System.setProperty("derby.stream.error.file", "target/derby.log");
|
||||||
|
}
|
||||||
|
|
||||||
|
private TestRunner runner;
|
||||||
|
|
||||||
|
@Before
|
||||||
|
public void setup() throws InitializationException {
|
||||||
|
final DBCPService dbcp = new DBCPServiceSimpleImpl();
|
||||||
|
final Map<String, String> dbcpProperties = new HashMap<>();
|
||||||
|
|
||||||
|
runner = TestRunners.newTestRunner(SelectHive_1_1QL.class);
|
||||||
|
runner.addControllerService("dbcp", dbcp, dbcpProperties);
|
||||||
|
runner.enableControllerService(dbcp);
|
||||||
|
runner.setProperty(SelectHive_1_1QL.HIVE_DBCP_SERVICE, "dbcp");
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testIncomingConnectionWithNoFlowFile() throws InitializationException {
|
||||||
|
runner.setIncomingConnection(true);
|
||||||
|
runner.setProperty(SelectHive_1_1QL.HIVEQL_SELECT_QUERY, "SELECT * FROM persons");
|
||||||
|
runner.run();
|
||||||
|
runner.assertTransferCount(SelectHive_1_1QL.REL_SUCCESS, 0);
|
||||||
|
runner.assertTransferCount(SelectHive_1_1QL.REL_FAILURE, 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testNoIncomingConnection() throws ClassNotFoundException, SQLException, InitializationException, IOException {
|
||||||
|
runner.setIncomingConnection(false);
|
||||||
|
invokeOnTrigger(QUERY_WITHOUT_EL, false, "Avro");
|
||||||
|
|
||||||
|
final List<ProvenanceEventRecord> provenanceEvents = runner.getProvenanceEvents();
|
||||||
|
final ProvenanceEventRecord provenance0 = provenanceEvents.get(0);
|
||||||
|
assertEquals(ProvenanceEventType.RECEIVE, provenance0.getEventType());
|
||||||
|
assertEquals("jdbc:derby:target/db;create=true", provenance0.getTransitUri());
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testNoTimeLimit() throws InitializationException, ClassNotFoundException, SQLException, IOException {
|
||||||
|
invokeOnTrigger(QUERY_WITH_EL, true, "Avro");
|
||||||
|
|
||||||
|
final List<ProvenanceEventRecord> provenanceEvents = runner.getProvenanceEvents();
|
||||||
|
assertEquals(3, provenanceEvents.size());
|
||||||
|
|
||||||
|
final ProvenanceEventRecord provenance0 = provenanceEvents.get(0);
|
||||||
|
assertEquals(ProvenanceEventType.FORK, provenance0.getEventType());
|
||||||
|
|
||||||
|
final ProvenanceEventRecord provenance1 = provenanceEvents.get(1);
|
||||||
|
assertEquals(ProvenanceEventType.FETCH, provenance1.getEventType());
|
||||||
|
assertEquals("jdbc:derby:target/db;create=true", provenance1.getTransitUri());
|
||||||
|
|
||||||
|
final ProvenanceEventRecord provenance2 = provenanceEvents.get(2);
|
||||||
|
assertEquals(ProvenanceEventType.FORK, provenance2.getEventType());
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testWithNullIntColumn() throws SQLException {
|
||||||
|
// remove previous test database, if any
|
||||||
|
final File dbLocation = new File(DB_LOCATION);
|
||||||
|
dbLocation.delete();
|
||||||
|
|
||||||
|
// load test data to database
|
||||||
|
final Connection con = ((Hive_1_1DBCPService) runner.getControllerService("dbcp")).getConnection();
|
||||||
|
Statement stmt = con.createStatement();
|
||||||
|
|
||||||
|
try {
|
||||||
|
stmt.execute("drop table TEST_NULL_INT");
|
||||||
|
} catch (final SQLException sqle) {
|
||||||
|
// Nothing to do, probably means the table didn't exist
|
||||||
|
}
|
||||||
|
|
||||||
|
stmt.execute("create table TEST_NULL_INT (id integer not null, val1 integer, val2 integer, constraint my_pk primary key (id))");
|
||||||
|
|
||||||
|
stmt.execute("insert into TEST_NULL_INT (id, val1, val2) VALUES (0, NULL, 1)");
|
||||||
|
stmt.execute("insert into TEST_NULL_INT (id, val1, val2) VALUES (1, 1, 1)");
|
||||||
|
|
||||||
|
runner.setIncomingConnection(false);
|
||||||
|
runner.setProperty(SelectHive_1_1QL.HIVEQL_SELECT_QUERY, "SELECT * FROM TEST_NULL_INT");
|
||||||
|
runner.run();
|
||||||
|
|
||||||
|
runner.assertAllFlowFilesTransferred(SelectHive_1_1QL.REL_SUCCESS, 1);
|
||||||
|
runner.getFlowFilesForRelationship(SelectHive_1_1QL.REL_SUCCESS).get(0).assertAttributeEquals(SelectHive_1_1QL.RESULT_ROW_COUNT, "2");
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testWithSqlException() throws SQLException {
|
||||||
|
// remove previous test database, if any
|
||||||
|
final File dbLocation = new File(DB_LOCATION);
|
||||||
|
dbLocation.delete();
|
||||||
|
|
||||||
|
// load test data to database
|
||||||
|
final Connection con = ((Hive_1_1DBCPService) runner.getControllerService("dbcp")).getConnection();
|
||||||
|
Statement stmt = con.createStatement();
|
||||||
|
|
||||||
|
try {
|
||||||
|
stmt.execute("drop table TEST_NO_ROWS");
|
||||||
|
} catch (final SQLException sqle) {
|
||||||
|
// Nothing to do, probably means the table didn't exist
|
||||||
|
}
|
||||||
|
|
||||||
|
stmt.execute("create table TEST_NO_ROWS (id integer)");
|
||||||
|
|
||||||
|
runner.setIncomingConnection(false);
|
||||||
|
// Try a valid SQL statement that will generate an error (val1 does not exist, e.g.)
|
||||||
|
runner.setProperty(SelectHive_1_1QL.HIVEQL_SELECT_QUERY, "SELECT val1 FROM TEST_NO_ROWS");
|
||||||
|
runner.run();
|
||||||
|
|
||||||
|
runner.assertAllFlowFilesTransferred(SelectHive_1_1QL.REL_FAILURE, 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void invokeOnTriggerExceptionInPreQieriesNoIncomingFlows()
|
||||||
|
throws InitializationException, ClassNotFoundException, SQLException, IOException {
|
||||||
|
|
||||||
|
doOnTrigger(QUERY_WITHOUT_EL, false, CSV,
|
||||||
|
"select 'no exception' from persons; select exception from persons",
|
||||||
|
null);
|
||||||
|
|
||||||
|
runner.assertAllFlowFilesTransferred(SelectHive_1_1QL.REL_FAILURE, 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void invokeOnTriggerExceptionInPreQieriesWithIncomingFlows()
|
||||||
|
throws InitializationException, ClassNotFoundException, SQLException, IOException {
|
||||||
|
|
||||||
|
doOnTrigger(QUERY_WITHOUT_EL, true, CSV,
|
||||||
|
"select 'no exception' from persons; select exception from persons",
|
||||||
|
null);
|
||||||
|
|
||||||
|
runner.assertAllFlowFilesTransferred(SelectHive_1_1QL.REL_FAILURE, 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void invokeOnTriggerExceptionInPostQieriesNoIncomingFlows()
|
||||||
|
throws InitializationException, ClassNotFoundException, SQLException, IOException {
|
||||||
|
|
||||||
|
doOnTrigger(QUERY_WITHOUT_EL, false, CSV,
|
||||||
|
null,
|
||||||
|
"select 'no exception' from persons; select exception from persons");
|
||||||
|
|
||||||
|
runner.assertAllFlowFilesTransferred(SelectHive_1_1QL.REL_FAILURE, 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void invokeOnTriggerExceptionInPostQieriesWithIncomingFlows()
|
||||||
|
throws InitializationException, ClassNotFoundException, SQLException, IOException {
|
||||||
|
|
||||||
|
doOnTrigger(QUERY_WITHOUT_EL, true, CSV,
|
||||||
|
null,
|
||||||
|
"select 'no exception' from persons; select exception from persons");
|
||||||
|
|
||||||
|
// with incoming connections, it should be rolled back
|
||||||
|
runner.assertAllFlowFilesTransferred(SelectHive_1_1QL.REL_FAILURE, 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testWithBadSQL() throws SQLException {
|
||||||
|
final String BAD_SQL = "create table TEST_NO_ROWS (id integer)";
|
||||||
|
|
||||||
|
// Test with incoming flow file (it should be routed to failure intact, i.e. same content and no parent)
|
||||||
|
runner.setIncomingConnection(true);
|
||||||
|
// Try a valid SQL statement that will generate an error (val1 does not exist, e.g.)
|
||||||
|
runner.enqueue(BAD_SQL);
|
||||||
|
runner.run();
|
||||||
|
runner.assertAllFlowFilesTransferred(SelectHive_1_1QL.REL_FAILURE, 1);
|
||||||
|
MockFlowFile flowFile = runner.getFlowFilesForRelationship(SelectHive_1_1QL.REL_FAILURE).get(0);
|
||||||
|
flowFile.assertContentEquals(BAD_SQL);
|
||||||
|
flowFile.assertAttributeEquals("parentIds", null);
|
||||||
|
runner.clearTransferState();
|
||||||
|
|
||||||
|
// Test with no incoming flow file (an empty flow file is transferred)
|
||||||
|
runner.setIncomingConnection(false);
|
||||||
|
// Try a valid SQL statement that will generate an error (val1 does not exist, e.g.)
|
||||||
|
runner.setProperty(SelectHive_1_1QL.HIVEQL_SELECT_QUERY, BAD_SQL);
|
||||||
|
runner.run();
|
||||||
|
runner.assertAllFlowFilesTransferred(SelectHive_1_1QL.REL_FAILURE, 1);
|
||||||
|
flowFile = runner.getFlowFilesForRelationship(SelectHive_1_1QL.REL_FAILURE).get(0);
|
||||||
|
flowFile.assertContentEquals("");
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void invokeOnTriggerWithCsv()
|
||||||
|
throws InitializationException, ClassNotFoundException, SQLException, IOException {
|
||||||
|
invokeOnTrigger(QUERY_WITHOUT_EL, false, CSV);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void invokeOnTriggerWithAvro()
|
||||||
|
throws InitializationException, ClassNotFoundException, SQLException, IOException {
|
||||||
|
invokeOnTrigger(QUERY_WITHOUT_EL, false, AVRO);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void invokeOnTriggerWithValidPreQieries()
|
||||||
|
throws InitializationException, ClassNotFoundException, SQLException, IOException {
|
||||||
|
invokeOnTrigger(QUERY_WITHOUT_EL, false, CSV,
|
||||||
|
"select '1' from persons; select '2' from persons", //should not be 'select'. But Derby driver doesn't support "set param=val" format.
|
||||||
|
null);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void invokeOnTriggerWithValidPostQieries()
|
||||||
|
throws InitializationException, ClassNotFoundException, SQLException, IOException {
|
||||||
|
invokeOnTrigger(QUERY_WITHOUT_EL, false, CSV,
|
||||||
|
null,
|
||||||
|
//should not be 'select'. But Derby driver doesn't support "set param=val" format,
|
||||||
|
//so just providing any "compilable" query.
|
||||||
|
" select '4' from persons; \nselect '5' from persons");
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void invokeOnTriggerWithValidPrePostQieries()
|
||||||
|
throws InitializationException, ClassNotFoundException, SQLException, IOException {
|
||||||
|
invokeOnTrigger(QUERY_WITHOUT_EL, false, CSV,
|
||||||
|
//should not be 'select'. But Derby driver doesn't support "set param=val" format,
|
||||||
|
//so just providing any "compilable" query.
|
||||||
|
"select '1' from persons; select '2' from persons",
|
||||||
|
" select '4' from persons; \nselect '5' from persons");
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
public void invokeOnTrigger(final String query, final boolean incomingFlowFile, String outputFormat)
|
||||||
|
throws InitializationException, ClassNotFoundException, SQLException, IOException {
|
||||||
|
invokeOnTrigger(query, incomingFlowFile, outputFormat, null, null);
|
||||||
|
}
|
||||||
|
|
||||||
|
public void invokeOnTrigger(final String query, final boolean incomingFlowFile, String outputFormat,
|
||||||
|
String preQueries, String postQueries)
|
||||||
|
throws InitializationException, ClassNotFoundException, SQLException, IOException {
|
||||||
|
|
||||||
|
TestRunner runner = doOnTrigger(query, incomingFlowFile, outputFormat, preQueries, postQueries);
|
||||||
|
runner.assertAllFlowFilesTransferred(SelectHive_1_1QL.REL_SUCCESS, 1);
|
||||||
|
|
||||||
|
final List<MockFlowFile> flowfiles = runner.getFlowFilesForRelationship(SelectHive_1_1QL.REL_SUCCESS);
|
||||||
|
MockFlowFile flowFile = flowfiles.get(0);
|
||||||
|
final InputStream in = new ByteArrayInputStream(flowFile.toByteArray());
|
||||||
|
long recordsFromStream = 0;
|
||||||
|
if (AVRO.equals(outputFormat)) {
|
||||||
|
assertEquals(MIME_TYPE_AVRO_BINARY, flowFile.getAttribute(CoreAttributes.MIME_TYPE.key()));
|
||||||
|
final DatumReader<GenericRecord> datumReader = new GenericDatumReader<>();
|
||||||
|
try (DataFileStream<GenericRecord> dataFileReader = new DataFileStream<>(in, datumReader)) {
|
||||||
|
GenericRecord record = null;
|
||||||
|
while (dataFileReader.hasNext()) {
|
||||||
|
// Reuse record object by passing it to next(). This saves us from
|
||||||
|
// allocating and garbage collecting many objects for files with
|
||||||
|
// many items.
|
||||||
|
record = dataFileReader.next(record);
|
||||||
|
recordsFromStream++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
assertEquals(CSV_MIME_TYPE, flowFile.getAttribute(CoreAttributes.MIME_TYPE.key()));
|
||||||
|
BufferedReader br = new BufferedReader(new InputStreamReader(in));
|
||||||
|
|
||||||
|
String headerRow = br.readLine();
|
||||||
|
// Derby capitalizes column names
|
||||||
|
assertEquals("PERSONID,PERSONNAME,PERSONCODE", headerRow);
|
||||||
|
|
||||||
|
// Validate rows
|
||||||
|
String line;
|
||||||
|
while ((line = br.readLine()) != null) {
|
||||||
|
recordsFromStream++;
|
||||||
|
String[] values = line.split(",");
|
||||||
|
if (recordsFromStream < (NUM_OF_ROWS - 10)) {
|
||||||
|
assertEquals(3, values.length);
|
||||||
|
assertTrue(values[1].startsWith("\""));
|
||||||
|
assertTrue(values[1].endsWith("\""));
|
||||||
|
} else {
|
||||||
|
assertEquals(2, values.length); // Middle value is null
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
assertEquals(NUM_OF_ROWS - 10, recordsFromStream);
|
||||||
|
assertEquals(recordsFromStream, Integer.parseInt(flowFile.getAttribute(SelectHive_1_1QL.RESULT_ROW_COUNT)));
|
||||||
|
flowFile.assertAttributeEquals(AbstractHive_1_1QLProcessor.ATTR_INPUT_TABLES, "persons");
|
||||||
|
}
|
||||||
|
|
||||||
|
public TestRunner doOnTrigger(final String query, final boolean incomingFlowFile, String outputFormat,
|
||||||
|
String preQueries, String postQueries)
|
||||||
|
throws InitializationException, ClassNotFoundException, SQLException, IOException {
|
||||||
|
|
||||||
|
// remove previous test database, if any
|
||||||
|
final File dbLocation = new File(DB_LOCATION);
|
||||||
|
dbLocation.delete();
|
||||||
|
|
||||||
|
// load test data to database
|
||||||
|
final Connection con = ((Hive_1_1DBCPService) runner.getControllerService("dbcp")).getConnection();
|
||||||
|
final Statement stmt = con.createStatement();
|
||||||
|
try {
|
||||||
|
stmt.execute("drop table persons");
|
||||||
|
} catch (final SQLException sqle) {
|
||||||
|
// Nothing to do here, the table didn't exist
|
||||||
|
}
|
||||||
|
|
||||||
|
stmt.execute("create table persons (id integer, name varchar(100), code integer)");
|
||||||
|
Random rng = new Random(53496);
|
||||||
|
stmt.executeUpdate("insert into persons values (1, 'Joe Smith', " + rng.nextInt(469947) + ")");
|
||||||
|
for (int i = 2; i < NUM_OF_ROWS; i++) {
|
||||||
|
stmt.executeUpdate("insert into persons values (" + i + ", 'Someone Else', " + rng.nextInt(469947) + ")");
|
||||||
|
}
|
||||||
|
stmt.executeUpdate("insert into persons values (" + NUM_OF_ROWS + ", 'Last Person', NULL)");
|
||||||
|
|
||||||
|
LOGGER.info("test data loaded");
|
||||||
|
|
||||||
|
runner.setProperty(SelectHive_1_1QL.HIVEQL_SELECT_QUERY, query);
|
||||||
|
runner.setProperty(HIVEQL_OUTPUT_FORMAT, outputFormat);
|
||||||
|
if (preQueries != null) {
|
||||||
|
runner.setProperty(SelectHive_1_1QL.HIVEQL_PRE_QUERY, preQueries);
|
||||||
|
}
|
||||||
|
if (postQueries != null) {
|
||||||
|
runner.setProperty(SelectHive_1_1QL.HIVEQL_POST_QUERY, postQueries);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (incomingFlowFile) {
|
||||||
|
// incoming FlowFile content is not used, but attributes are used
|
||||||
|
final Map<String, String> attributes = new HashMap<>();
|
||||||
|
attributes.put("person.id", "10");
|
||||||
|
runner.enqueue("Hello".getBytes(), attributes);
|
||||||
|
}
|
||||||
|
|
||||||
|
runner.setIncomingConnection(incomingFlowFile);
|
||||||
|
runner.run();
|
||||||
|
|
||||||
|
return runner;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testMaxRowsPerFlowFileAvro() throws ClassNotFoundException, SQLException, InitializationException, IOException {
|
||||||
|
|
||||||
|
// load test data to database
|
||||||
|
final Connection con = ((DBCPService) runner.getControllerService("dbcp")).getConnection();
|
||||||
|
Statement stmt = con.createStatement();
|
||||||
|
InputStream in;
|
||||||
|
MockFlowFile mff;
|
||||||
|
|
||||||
|
try {
|
||||||
|
stmt.execute("drop table TEST_QUERY_DB_TABLE");
|
||||||
|
} catch (final SQLException sqle) {
|
||||||
|
// Ignore this error, probably a "table does not exist" since Derby doesn't yet support DROP IF EXISTS [DERBY-4842]
|
||||||
|
}
|
||||||
|
|
||||||
|
stmt.execute("create table TEST_QUERY_DB_TABLE (id integer not null, name varchar(100), scale float, created_on timestamp, bignum bigint default 0)");
|
||||||
|
int rowCount = 0;
|
||||||
|
//create larger row set
|
||||||
|
for (int batch = 0; batch < 100; batch++) {
|
||||||
|
stmt.execute("insert into TEST_QUERY_DB_TABLE (id, name, scale, created_on) VALUES (" + rowCount + ", 'Joe Smith', 1.0, '1962-09-23 03:23:34.234')");
|
||||||
|
rowCount++;
|
||||||
|
}
|
||||||
|
|
||||||
|
runner.setIncomingConnection(false);
|
||||||
|
runner.setProperty(SelectHive_1_1QL.HIVEQL_SELECT_QUERY, "SELECT * FROM TEST_QUERY_DB_TABLE");
|
||||||
|
runner.setProperty(SelectHive_1_1QL.MAX_ROWS_PER_FLOW_FILE, "${" + MAX_ROWS_KEY + "}");
|
||||||
|
runner.setProperty(SelectHive_1_1QL.HIVEQL_OUTPUT_FORMAT, HiveJdbcCommon.AVRO);
|
||||||
|
runner.setVariable(MAX_ROWS_KEY, "9");
|
||||||
|
|
||||||
|
runner.run();
|
||||||
|
runner.assertAllFlowFilesTransferred(SelectHive_1_1QL.REL_SUCCESS, 12);
|
||||||
|
|
||||||
|
//ensure all but the last file have 9 records each
|
||||||
|
for (int ff = 0; ff < 11; ff++) {
|
||||||
|
mff = runner.getFlowFilesForRelationship(SelectHive_1_1QL.REL_SUCCESS).get(ff);
|
||||||
|
in = new ByteArrayInputStream(mff.toByteArray());
|
||||||
|
assertEquals(9, getNumberOfRecordsFromStream(in));
|
||||||
|
|
||||||
|
mff.assertAttributeExists("fragment.identifier");
|
||||||
|
assertEquals(Integer.toString(ff), mff.getAttribute("fragment.index"));
|
||||||
|
assertEquals("12", mff.getAttribute("fragment.count"));
|
||||||
|
}
|
||||||
|
|
||||||
|
//last file should have 1 record
|
||||||
|
mff = runner.getFlowFilesForRelationship(SelectHive_1_1QL.REL_SUCCESS).get(11);
|
||||||
|
in = new ByteArrayInputStream(mff.toByteArray());
|
||||||
|
assertEquals(1, getNumberOfRecordsFromStream(in));
|
||||||
|
mff.assertAttributeExists("fragment.identifier");
|
||||||
|
assertEquals(Integer.toString(11), mff.getAttribute("fragment.index"));
|
||||||
|
assertEquals("12", mff.getAttribute("fragment.count"));
|
||||||
|
runner.clearTransferState();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testParametrizedQuery() throws ClassNotFoundException, SQLException, InitializationException, IOException {
|
||||||
|
// load test data to database
|
||||||
|
final Connection con = ((DBCPService) runner.getControllerService("dbcp")).getConnection();
|
||||||
|
Statement stmt = con.createStatement();
|
||||||
|
|
||||||
|
try {
|
||||||
|
stmt.execute("drop table TEST_QUERY_DB_TABLE");
|
||||||
|
} catch (final SQLException sqle) {
|
||||||
|
// Ignore this error, probably a "table does not exist" since Derby doesn't yet support DROP IF EXISTS [DERBY-4842]
|
||||||
|
}
|
||||||
|
|
||||||
|
stmt.execute("create table TEST_QUERY_DB_TABLE (id integer not null, name varchar(100), scale float, created_on timestamp, bignum bigint default 0)");
|
||||||
|
int rowCount = 0;
|
||||||
|
//create larger row set
|
||||||
|
for (int batch = 0; batch < 100; batch++) {
|
||||||
|
stmt.execute("insert into TEST_QUERY_DB_TABLE (id, name, scale, created_on) VALUES (" + rowCount + ", 'Joe Smith', 1.0, '1962-09-23 03:23:34.234')");
|
||||||
|
rowCount++;
|
||||||
|
}
|
||||||
|
|
||||||
|
runner.setIncomingConnection(true);
|
||||||
|
runner.setProperty(SelectHive_1_1QL.MAX_ROWS_PER_FLOW_FILE, "${" + MAX_ROWS_KEY + "}");
|
||||||
|
runner.setProperty(SelectHive_1_1QL.HIVEQL_OUTPUT_FORMAT, HiveJdbcCommon.AVRO);
|
||||||
|
runner.setVariable(MAX_ROWS_KEY, "9");
|
||||||
|
|
||||||
|
Map<String, String> attributes = new HashMap<String, String>();
|
||||||
|
attributes.put("hiveql.args.1.value", "1");
|
||||||
|
attributes.put("hiveql.args.1.type", String.valueOf(Types.INTEGER));
|
||||||
|
runner.enqueue("SELECT * FROM TEST_QUERY_DB_TABLE WHERE id = ?", attributes );
|
||||||
|
|
||||||
|
runner.run();
|
||||||
|
runner.assertAllFlowFilesTransferred(SelectHive_1_1QL.REL_SUCCESS, 1);
|
||||||
|
MockFlowFile flowFile = runner.getFlowFilesForRelationship(SelectHive_1_1QL.REL_SUCCESS).get(0);
|
||||||
|
// Assert the attributes from the incoming flow file are preserved in the outgoing flow file(s)
|
||||||
|
flowFile.assertAttributeEquals("hiveql.args.1.value", "1");
|
||||||
|
flowFile.assertAttributeEquals("hiveql.args.1.type", String.valueOf(Types.INTEGER));
|
||||||
|
runner.clearTransferState();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testMaxRowsPerFlowFileCSV() throws ClassNotFoundException, SQLException, InitializationException, IOException {
|
||||||
|
|
||||||
|
// load test data to database
|
||||||
|
final Connection con = ((DBCPService) runner.getControllerService("dbcp")).getConnection();
|
||||||
|
Statement stmt = con.createStatement();
|
||||||
|
InputStream in;
|
||||||
|
MockFlowFile mff;
|
||||||
|
|
||||||
|
try {
|
||||||
|
stmt.execute("drop table TEST_QUERY_DB_TABLE");
|
||||||
|
} catch (final SQLException sqle) {
|
||||||
|
// Ignore this error, probably a "table does not exist" since Derby doesn't yet support DROP IF EXISTS [DERBY-4842]
|
||||||
|
}
|
||||||
|
|
||||||
|
stmt.execute("create table TEST_QUERY_DB_TABLE (id integer not null, name varchar(100), scale float, created_on timestamp, bignum bigint default 0)");
|
||||||
|
int rowCount = 0;
|
||||||
|
//create larger row set
|
||||||
|
for (int batch = 0; batch < 100; batch++) {
|
||||||
|
stmt.execute("insert into TEST_QUERY_DB_TABLE (id, name, scale, created_on) VALUES (" + rowCount + ", 'Joe Smith', 1.0, '1962-09-23 03:23:34.234')");
|
||||||
|
rowCount++;
|
||||||
|
}
|
||||||
|
|
||||||
|
runner.setIncomingConnection(true);
|
||||||
|
runner.setProperty(SelectHive_1_1QL.MAX_ROWS_PER_FLOW_FILE, "${" + MAX_ROWS_KEY + "}");
|
||||||
|
runner.setProperty(SelectHive_1_1QL.HIVEQL_OUTPUT_FORMAT, HiveJdbcCommon.CSV);
|
||||||
|
|
||||||
|
runner.enqueue("SELECT * FROM TEST_QUERY_DB_TABLE", new HashMap<String, String>() {{
|
||||||
|
put(MAX_ROWS_KEY, "9");
|
||||||
|
}});
|
||||||
|
|
||||||
|
runner.run();
|
||||||
|
runner.assertAllFlowFilesTransferred(SelectHive_1_1QL.REL_SUCCESS, 12);
|
||||||
|
|
||||||
|
//ensure all but the last file have 9 records (10 lines = 9 records + header) each
|
||||||
|
for (int ff = 0; ff < 11; ff++) {
|
||||||
|
mff = runner.getFlowFilesForRelationship(SelectHive_1_1QL.REL_SUCCESS).get(ff);
|
||||||
|
in = new ByteArrayInputStream(mff.toByteArray());
|
||||||
|
BufferedReader br = new BufferedReader(new InputStreamReader(in));
|
||||||
|
assertEquals(10, br.lines().count());
|
||||||
|
|
||||||
|
mff.assertAttributeExists("fragment.identifier");
|
||||||
|
assertEquals(Integer.toString(ff), mff.getAttribute("fragment.index"));
|
||||||
|
assertEquals("12", mff.getAttribute("fragment.count"));
|
||||||
|
}
|
||||||
|
|
||||||
|
//last file should have 1 record (2 lines = 1 record + header)
|
||||||
|
mff = runner.getFlowFilesForRelationship(SelectHive_1_1QL.REL_SUCCESS).get(11);
|
||||||
|
in = new ByteArrayInputStream(mff.toByteArray());
|
||||||
|
BufferedReader br = new BufferedReader(new InputStreamReader(in));
|
||||||
|
assertEquals(2, br.lines().count());
|
||||||
|
mff.assertAttributeExists("fragment.identifier");
|
||||||
|
assertEquals(Integer.toString(11), mff.getAttribute("fragment.index"));
|
||||||
|
assertEquals("12", mff.getAttribute("fragment.count"));
|
||||||
|
runner.clearTransferState();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testMaxRowsPerFlowFileWithMaxFragments() throws ClassNotFoundException, SQLException, InitializationException, IOException {
|
||||||
|
|
||||||
|
// load test data to database
|
||||||
|
final Connection con = ((DBCPService) runner.getControllerService("dbcp")).getConnection();
|
||||||
|
Statement stmt = con.createStatement();
|
||||||
|
InputStream in;
|
||||||
|
MockFlowFile mff;
|
||||||
|
|
||||||
|
try {
|
||||||
|
stmt.execute("drop table TEST_QUERY_DB_TABLE");
|
||||||
|
} catch (final SQLException sqle) {
|
||||||
|
// Ignore this error, probably a "table does not exist" since Derby doesn't yet support DROP IF EXISTS [DERBY-4842]
|
||||||
|
}
|
||||||
|
|
||||||
|
stmt.execute("create table TEST_QUERY_DB_TABLE (id integer not null, name varchar(100), scale float, created_on timestamp, bignum bigint default 0)");
|
||||||
|
int rowCount = 0;
|
||||||
|
//create larger row set
|
||||||
|
for (int batch = 0; batch < 100; batch++) {
|
||||||
|
stmt.execute("insert into TEST_QUERY_DB_TABLE (id, name, scale, created_on) VALUES (" + rowCount + ", 'Joe Smith', 1.0, '1962-09-23 03:23:34.234')");
|
||||||
|
rowCount++;
|
||||||
|
}
|
||||||
|
|
||||||
|
runner.setIncomingConnection(false);
|
||||||
|
runner.setProperty(SelectHive_1_1QL.HIVEQL_SELECT_QUERY, "SELECT * FROM TEST_QUERY_DB_TABLE");
|
||||||
|
runner.setProperty(SelectHive_1_1QL.MAX_ROWS_PER_FLOW_FILE, "9");
|
||||||
|
Integer maxFragments = 3;
|
||||||
|
runner.setProperty(SelectHive_1_1QL.MAX_FRAGMENTS, maxFragments.toString());
|
||||||
|
|
||||||
|
runner.run();
|
||||||
|
runner.assertAllFlowFilesTransferred(SelectHive_1_1QL.REL_SUCCESS, maxFragments);
|
||||||
|
|
||||||
|
for (int i = 0; i < maxFragments; i++) {
|
||||||
|
mff = runner.getFlowFilesForRelationship(SelectHive_1_1QL.REL_SUCCESS).get(i);
|
||||||
|
in = new ByteArrayInputStream(mff.toByteArray());
|
||||||
|
assertEquals(9, getNumberOfRecordsFromStream(in));
|
||||||
|
|
||||||
|
mff.assertAttributeExists("fragment.identifier");
|
||||||
|
assertEquals(Integer.toString(i), mff.getAttribute("fragment.index"));
|
||||||
|
assertEquals(maxFragments.toString(), mff.getAttribute("fragment.count"));
|
||||||
|
}
|
||||||
|
|
||||||
|
runner.clearTransferState();
|
||||||
|
}
|
||||||
|
|
||||||
|
private long getNumberOfRecordsFromStream(InputStream in) throws IOException {
|
||||||
|
final DatumReader<GenericRecord> datumReader = new GenericDatumReader<>();
|
||||||
|
try (DataFileStream<GenericRecord> dataFileReader = new DataFileStream<>(in, datumReader)) {
|
||||||
|
GenericRecord record = null;
|
||||||
|
long recordsFromStream = 0;
|
||||||
|
while (dataFileReader.hasNext()) {
|
||||||
|
// Reuse record object by passing it to next(). This saves us from
|
||||||
|
// allocating and garbage collecting many objects for files with
|
||||||
|
// many items.
|
||||||
|
record = dataFileReader.next(record);
|
||||||
|
recordsFromStream += 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
return recordsFromStream;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Simple implementation only for SelectHive_1_1QL processor testing.
|
||||||
|
*/
|
||||||
|
private class DBCPServiceSimpleImpl extends AbstractControllerService implements Hive_1_1DBCPService {
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String getIdentifier() {
|
||||||
|
return "dbcp";
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Connection getConnection() throws ProcessException {
|
||||||
|
try {
|
||||||
|
Class.forName("org.apache.derby.jdbc.EmbeddedDriver");
|
||||||
|
return DriverManager.getConnection("jdbc:derby:" + DB_LOCATION + ";create=true");
|
||||||
|
} catch (final Exception e) {
|
||||||
|
throw new ProcessException("getConnection failed: " + e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String getConnectionURL() {
|
||||||
|
return "jdbc:derby:" + DB_LOCATION + ";create=true";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,38 @@
|
||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
{
|
||||||
|
"namespace" : "org.apache.nifi",
|
||||||
|
"name" : "outer_record",
|
||||||
|
"type" : "record",
|
||||||
|
"fields" : [ {
|
||||||
|
"name" : "records",
|
||||||
|
"type" : {
|
||||||
|
"type" : "array",
|
||||||
|
"items" : {
|
||||||
|
"type" : "record",
|
||||||
|
"name" : "inner_record",
|
||||||
|
"fields" : [ {
|
||||||
|
"name" : "name",
|
||||||
|
"type" : "string"
|
||||||
|
}, {
|
||||||
|
"name" : "age",
|
||||||
|
"type" : "int"
|
||||||
|
} ]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} ]
|
||||||
|
}
|
|
@ -0,0 +1,30 @@
|
||||||
|
<?xml version="1.0"?>
|
||||||
|
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
|
||||||
|
<!--
|
||||||
|
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
contributor license agreements. See the NOTICE file distributed with
|
||||||
|
this work for additional information regarding copyright ownership.
|
||||||
|
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
(the "License"); you may not use this file except in compliance with
|
||||||
|
the License. You may obtain a copy of the License at
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
Unless required by applicable law or agreed to in writing, software
|
||||||
|
distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
See the License for the specific language governing permissions and
|
||||||
|
limitations under the License.
|
||||||
|
-->
|
||||||
|
<configuration>
|
||||||
|
<property>
|
||||||
|
<name>fs.default.name</name>
|
||||||
|
<value>hdfs://hive</value>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>hadoop.security.authentication</name>
|
||||||
|
<value>kerberos</value>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>hadoop.security.authorization</name>
|
||||||
|
<value>true</value>
|
||||||
|
</property>
|
||||||
|
</configuration>
|
|
@ -0,0 +1,22 @@
|
||||||
|
<?xml version="1.0"?>
|
||||||
|
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
|
||||||
|
<!--
|
||||||
|
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
contributor license agreements. See the NOTICE file distributed with
|
||||||
|
this work for additional information regarding copyright ownership.
|
||||||
|
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
(the "License"); you may not use this file except in compliance with
|
||||||
|
the License. You may obtain a copy of the License at
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
Unless required by applicable law or agreed to in writing, software
|
||||||
|
distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
See the License for the specific language governing permissions and
|
||||||
|
limitations under the License.
|
||||||
|
-->
|
||||||
|
<configuration>
|
||||||
|
<property>
|
||||||
|
<name>fs.default.name</name>
|
||||||
|
<value>hdfs://hive</value>
|
||||||
|
</property>
|
||||||
|
</configuration>
|
|
@ -0,0 +1,30 @@
|
||||||
|
<?xml version="1.0"?>
|
||||||
|
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
|
||||||
|
<!--
|
||||||
|
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
contributor license agreements. See the NOTICE file distributed with
|
||||||
|
this work for additional information regarding copyright ownership.
|
||||||
|
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
(the "License"); you may not use this file except in compliance with
|
||||||
|
the License. You may obtain a copy of the License at
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
Unless required by applicable law or agreed to in writing, software
|
||||||
|
distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
See the License for the specific language governing permissions and
|
||||||
|
limitations under the License.
|
||||||
|
-->
|
||||||
|
<configuration>
|
||||||
|
<property>
|
||||||
|
<name>fs.default.name</name>
|
||||||
|
<value>hdfs://hive</value>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>hive.server2.authentication</name>
|
||||||
|
<value>KERBEROS</value>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>hadoop.security.authentication</name>
|
||||||
|
<value>kerberos</value>
|
||||||
|
</property>
|
||||||
|
</configuration>
|
|
@ -0,0 +1,22 @@
|
||||||
|
<?xml version="1.0"?>
|
||||||
|
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
|
||||||
|
<!--
|
||||||
|
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
contributor license agreements. See the NOTICE file distributed with
|
||||||
|
this work for additional information regarding copyright ownership.
|
||||||
|
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
(the "License"); you may not use this file except in compliance with
|
||||||
|
the License. You may obtain a copy of the License at
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
Unless required by applicable law or agreed to in writing, software
|
||||||
|
distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
See the License for the specific language governing permissions and
|
||||||
|
limitations under the License.
|
||||||
|
-->
|
||||||
|
<configuration>
|
||||||
|
<property>
|
||||||
|
<name>fs.default.name</name>
|
||||||
|
<value>file:///</value>
|
||||||
|
</property>
|
||||||
|
</configuration>
|
|
@ -0,0 +1,10 @@
|
||||||
|
[libdefaults]
|
||||||
|
default_realm = EXAMPLE.COM
|
||||||
|
dns_lookup_kdc = false
|
||||||
|
dns_lookup_realm = false
|
||||||
|
|
||||||
|
[realms]
|
||||||
|
EXAMPLE.COM = {
|
||||||
|
kdc = kerberos.example.com
|
||||||
|
admin_server = kerberos.example.com
|
||||||
|
}
|
|
@ -0,0 +1,26 @@
|
||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
{"namespace": "example.avro",
|
||||||
|
"type": "record",
|
||||||
|
"name": "User",
|
||||||
|
"fields": [
|
||||||
|
{"name": "name", "type": "string"},
|
||||||
|
{"name": "favorite_number", "type": ["int", "null"]},
|
||||||
|
{"name": "favorite_color", "type": ["string", "null"]},
|
||||||
|
{"name": "scale", "type": ["double", "null"]}
|
||||||
|
]
|
||||||
|
}
|
|
@ -31,6 +31,8 @@
|
||||||
<module>nifi-hive-services-api-nar</module>
|
<module>nifi-hive-services-api-nar</module>
|
||||||
<module>nifi-hive-processors</module>
|
<module>nifi-hive-processors</module>
|
||||||
<module>nifi-hive-nar</module>
|
<module>nifi-hive-nar</module>
|
||||||
|
<module>nifi-hive_1_1-processors</module>
|
||||||
|
<module>nifi-hive_1_1-nar</module>
|
||||||
<module>nifi-hive3-processors</module>
|
<module>nifi-hive3-processors</module>
|
||||||
<module>nifi-hive3-nar</module>
|
<module>nifi-hive3-nar</module>
|
||||||
</modules>
|
</modules>
|
||||||
|
@ -47,6 +49,8 @@
|
||||||
</dependencyManagement>
|
</dependencyManagement>
|
||||||
|
|
||||||
<properties>
|
<properties>
|
||||||
|
<hive11.version>1.1.1</hive11.version>
|
||||||
|
<hive11.hadoop.version>2.6.2</hive11.hadoop.version>
|
||||||
<hive.version>1.2.1</hive.version>
|
<hive.version>1.2.1</hive.version>
|
||||||
<hive.hadoop.version>2.6.2</hive.hadoop.version>
|
<hive.hadoop.version>2.6.2</hive.hadoop.version>
|
||||||
<hive3.version>3.1.0</hive3.version>
|
<hive3.version>3.1.0</hive3.version>
|
||||||
|
|
Loading…
Reference in New Issue