NIFI-13935 Removed HBase 2 Processors and Services

This closes #9453

Signed-off-by: Joseph Witt <joewitt@apache.org>
This commit is contained in:
exceptionfactory 2024-10-26 17:42:16 -05:00 committed by Joseph Witt
parent eb8d4ee06f
commit 63b0456bad
No known key found for this signature in database
GPG Key ID: 9093BF854F811A1A
91 changed files with 0 additions and 13455 deletions

View File

@ -938,29 +938,6 @@ language governing permissions and limitations under the License. -->
</dependency>
</dependencies>
</profile>
<profile>
<id>include-hbase</id>
<activation>
<activeByDefault>false</activeByDefault>
<property>
<name>allProfiles</name>
</property>
</activation>
<dependencies>
<dependency>
<groupId>org.apache.nifi</groupId>
<artifactId>nifi-hbase-nar</artifactId>
<version>2.0.0-SNAPSHOT</version>
<type>nar</type>
</dependency>
<dependency>
<groupId>org.apache.nifi</groupId>
<artifactId>nifi-hbase_2-client-service-nar</artifactId>
<version>2.0.0-SNAPSHOT</version>
<type>nar</type>
</dependency>
</dependencies>
</profile>
<profile>
<id>include-iotdb</id>
<activation>

View File

@ -990,11 +990,6 @@
<artifactId>nifi-hazelcast-services-api</artifactId>
<version>2.0.0-SNAPSHOT</version>
</dependency>
<dependency>
<groupId>org.apache.nifi</groupId>
<artifactId>nifi-hbase-processors</artifactId>
<version>2.0.0-SNAPSHOT</version>
</dependency>
<dependency>
<groupId>org.apache.nifi</groupId>
<artifactId>nifi-hl7-processors</artifactId>
@ -1379,16 +1374,6 @@
<artifactId>nifi-hadoop-dbcp-service</artifactId>
<version>2.0.0-SNAPSHOT</version>
</dependency>
<dependency>
<groupId>org.apache.nifi</groupId>
<artifactId>nifi-hbase-client-service-api</artifactId>
<version>2.0.0-SNAPSHOT</version>
</dependency>
<dependency>
<groupId>org.apache.nifi</groupId>
<artifactId>nifi-hbase_2-client-service</artifactId>
<version>2.0.0-SNAPSHOT</version>
</dependency>
<dependency>
<groupId>org.apache.nifi</groupId>
<artifactId>nifi-http-context-map-api</artifactId>

View File

@ -59,11 +59,6 @@
<packageUrl regex="true">^pkg:maven/org\.apache\.hive/hive\-storage\-api@.*$</packageUrl>
<cve>CVE-2021-34538</cve>
</suppress>
<suppress>
<notes>Hadoop vulnerabilities do not apply to HBase Hadoop2 compatibility library</notes>
<packageUrl regex="true">^pkg:maven/org\.apache\.hbase/hbase\-hadoop2\-compat@.*$</packageUrl>
<cpe>cpe:/a:apache:hadoop</cpe>
</suppress>
<suppress>
<notes>The Jackson maintainers dispute the applicability of CVE-2023-35116 based on cyclic nature of reported concern</notes>
<packageUrl regex="true">^pkg:maven/com\.fasterxml\.jackson\.core/jackson\-databind@.*$</packageUrl>

View File

@ -2699,7 +2699,6 @@ deprecationLogger.warn(
|==================================================================================================================================================
| Package | Maven Profile | Description
| Apache Hadoop Bundle | include-hadoop | Adds support for Apache Hadoop with HDFS and Parquet components
| Apache HBase Bundle | include-hbase | Adds support for Apache HBase
| Apache IoTDB Bundle | include-iotdb | Adds support for Apache IoTDB
| ASN.1 Support | include-asn1 | Adds support for ASN.1
| Contribution Check | contrib-check | Runs various quality checks that are required to be accepted before a contribution can be accepted into the core NiFi code base.

View File

@ -1,38 +0,0 @@
<?xml version="1.0" encoding="UTF-8"?>
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 https://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<parent>
<groupId>org.apache.nifi</groupId>
<artifactId>nifi-hbase-bundle</artifactId>
<version>2.0.0-SNAPSHOT</version>
</parent>
<artifactId>nifi-hbase-nar</artifactId>
<packaging>nar</packaging>
<dependencies>
<dependency>
<groupId>org.apache.nifi</groupId>
<artifactId>nifi-standard-shared-nar</artifactId>
<version>2.0.0-SNAPSHOT</version>
<type>nar</type>
</dependency>
<dependency>
<groupId>org.apache.nifi</groupId>
<artifactId>nifi-hbase-processors</artifactId>
<version>2.0.0-SNAPSHOT</version>
</dependency>
</dependencies>
</project>

View File

@ -1,231 +0,0 @@
Apache License
Version 2.0, January 2004
http://www.apache.org/licenses/
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
1. Definitions.
"License" shall mean the terms and conditions for use, reproduction,
and distribution as defined by Sections 1 through 9 of this document.
"Licensor" shall mean the copyright owner or entity authorized by
the copyright owner that is granting the License.
"Legal Entity" shall mean the union of the acting entity and all
other entities that control, are controlled by, or are under common
control with that entity. For the purposes of this definition,
"control" means (i) the power, direct or indirect, to cause the
direction or management of such entity, whether by contract or
otherwise, or (ii) ownership of fifty percent (50%) or more of the
outstanding shares, or (iii) beneficial ownership of such entity.
"You" (or "Your") shall mean an individual or Legal Entity
exercising permissions granted by this License.
"Source" form shall mean the preferred form for making modifications,
including but not limited to software source code, documentation
source, and configuration files.
"Object" form shall mean any form resulting from mechanical
transformation or translation of a Source form, including but
not limited to compiled object code, generated documentation,
and conversions to other media types.
"Work" shall mean the work of authorship, whether in Source or
Object form, made available under the License, as indicated by a
copyright notice that is included in or attached to the work
(an example is provided in the Appendix below).
"Derivative Works" shall mean any work, whether in Source or Object
form, that is based on (or derived from) the Work and for which the
editorial revisions, annotations, elaborations, or other modifications
represent, as a whole, an original work of authorship. For the purposes
of this License, Derivative Works shall not include works that remain
separable from, or merely link (or bind by name) to the interfaces of,
the Work and Derivative Works thereof.
"Contribution" shall mean any work of authorship, including
the original version of the Work and any modifications or additions
to that Work or Derivative Works thereof, that is intentionally
submitted to Licensor for inclusion in the Work by the copyright owner
or by an individual or Legal Entity authorized to submit on behalf of
the copyright owner. For the purposes of this definition, "submitted"
means any form of electronic, verbal, or written communication sent
to the Licensor or its representatives, including but not limited to
communication on electronic mailing lists, source code control systems,
and issue tracking systems that are managed by, or on behalf of, the
Licensor for the purpose of discussing and improving the Work, but
excluding communication that is conspicuously marked or otherwise
designated in writing by the copyright owner as "Not a Contribution."
"Contributor" shall mean Licensor and any individual or Legal Entity
on behalf of whom a Contribution has been received by Licensor and
subsequently incorporated within the Work.
2. Grant of Copyright License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
copyright license to reproduce, prepare Derivative Works of,
publicly display, publicly perform, sublicense, and distribute the
Work and such Derivative Works in Source or Object form.
3. Grant of Patent License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
(except as stated in this section) patent license to make, have made,
use, offer to sell, sell, import, and otherwise transfer the Work,
where such license applies only to those patent claims licensable
by such Contributor that are necessarily infringed by their
Contribution(s) alone or by combination of their Contribution(s)
with the Work to which such Contribution(s) was submitted. If You
institute patent litigation against any entity (including a
cross-claim or counterclaim in a lawsuit) alleging that the Work
or a Contribution incorporated within the Work constitutes direct
or contributory patent infringement, then any patent licenses
granted to You under this License for that Work shall terminate
as of the date such litigation is filed.
4. Redistribution. You may reproduce and distribute copies of the
Work or Derivative Works thereof in any medium, with or without
modifications, and in Source or Object form, provided that You
meet the following conditions:
(a) You must give any other recipients of the Work or
Derivative Works a copy of this License; and
(b) You must cause any modified files to carry prominent notices
stating that You changed the files; and
(c) You must retain, in the Source form of any Derivative Works
that You distribute, all copyright, patent, trademark, and
attribution notices from the Source form of the Work,
excluding those notices that do not pertain to any part of
the Derivative Works; and
(d) If the Work includes a "NOTICE" text file as part of its
distribution, then any Derivative Works that You distribute must
include a readable copy of the attribution notices contained
within such NOTICE file, excluding those notices that do not
pertain to any part of the Derivative Works, in at least one
of the following places: within a NOTICE text file distributed
as part of the Derivative Works; within the Source form or
documentation, if provided along with the Derivative Works; or,
within a display generated by the Derivative Works, if and
wherever such third-party notices normally appear. The contents
of the NOTICE file are for informational purposes only and
do not modify the License. You may add Your own attribution
notices within Derivative Works that You distribute, alongside
or as an addendum to the NOTICE text from the Work, provided
that such additional attribution notices cannot be construed
as modifying the License.
You may add Your own copyright statement to Your modifications and
may provide additional or different license terms and conditions
for use, reproduction, or distribution of Your modifications, or
for any such Derivative Works as a whole, provided Your use,
reproduction, and distribution of the Work otherwise complies with
the conditions stated in this License.
5. Submission of Contributions. Unless You explicitly state otherwise,
any Contribution intentionally submitted for inclusion in the Work
by You to the Licensor shall be under the terms and conditions of
this License, without any additional terms or conditions.
Notwithstanding the above, nothing herein shall supersede or modify
the terms of any separate license agreement you may have executed
with Licensor regarding such Contributions.
6. Trademarks. This License does not grant permission to use the trade
names, trademarks, service marks, or product names of the Licensor,
except as required for reasonable and customary use in describing the
origin of the Work and reproducing the content of the NOTICE file.
7. Disclaimer of Warranty. Unless required by applicable law or
agreed to in writing, Licensor provides the Work (and each
Contributor provides its Contributions) on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
implied, including, without limitation, any warranties or conditions
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
PARTICULAR PURPOSE. You are solely responsible for determining the
appropriateness of using or redistributing the Work and assume any
risks associated with Your exercise of permissions under this License.
8. Limitation of Liability. In no event and under no legal theory,
whether in tort (including negligence), contract, or otherwise,
unless required by applicable law (such as deliberate and grossly
negligent acts) or agreed to in writing, shall any Contributor be
liable to You for damages, including any direct, indirect, special,
incidental, or consequential damages of any character arising as a
result of this License or out of the use or inability to use the
Work (including but not limited to damages for loss of goodwill,
work stoppage, computer failure or malfunction, or any and all
other commercial damages or losses), even if such Contributor
has been advised of the possibility of such damages.
9. Accepting Warranty or Additional Liability. While redistributing
the Work or Derivative Works thereof, You may choose to offer,
and charge a fee for, acceptance of support, warranty, indemnity,
or other liability obligations and/or rights consistent with this
License. However, in accepting such obligations, You may act only
on Your own behalf and on Your sole responsibility, not on behalf
of any other Contributor, and only if You agree to indemnify,
defend, and hold each Contributor harmless for any liability
incurred by, or claims asserted against, such Contributor by reason
of your accepting any such warranty or additional liability.
END OF TERMS AND CONDITIONS
APPENDIX: How to apply the Apache License to your work.
To apply the Apache License to your work, attach the following
boilerplate notice, with the fields enclosed by brackets "[]"
replaced with your own identifying information. (Don't include
the brackets!) The text should be enclosed in the appropriate
comment syntax for the file format. We also recommend that a
file or class name and description of purpose be included on the
same "printed page" as the copyright notice for easier
identification within third-party archives.
Copyright [yyyy] [name of copyright owner]
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
APACHE NIFI SUBCOMPONENTS:
The Apache NiFi project contains subcomponents with separate copyright
notices and license terms. Your use of the source code for the these
subcomponents is subject to the terms and conditions of the following
licenses.
The binary distribution of this product bundles 'Bouncy Castle JDK 1.5'
under an MIT style license.
Copyright (c) 2000 - 2015 The Legion of the Bouncy Castle Inc. (http://www.bouncycastle.org)
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.

View File

@ -1,42 +0,0 @@
nifi-hbase-nar
Copyright 2014-2024 The Apache Software Foundation
This product includes software developed at
The Apache Software Foundation (http://www.apache.org/).
===========================================
Apache Software License v2
===========================================
The following binary components are provided under the Apache Software License v2
(ASLv2) Apache Commons Lang
The following NOTICE information applies:
Apache Commons Lang
Copyright 2001-2015 The Apache Software Foundation
This product includes software from the Spring Framework,
under the Apache License 2.0 (see: StringUtils.containsWhitespace())
(ASLv2) Jackson JSON processor
The following NOTICE information applies:
# Jackson JSON processor
Jackson is a high-performance, Free/Open Source JSON processing library.
It was originally written by Tatu Saloranta (tatu.saloranta@iki.fi), and has
been in development since 2007.
It is currently developed by a community of developers, as well as supported
commercially by FasterXML.com.
## Licensing
Jackson core and extension components may licensed under different licenses.
To find the details that apply to this artifact see the accompanying LICENSE file.
For more information, including possible other licensing options, contact
FasterXML.com (http://fasterxml.com).
## Credits
A list of contributors may be found from CREDITS file, which is included
in some artifacts (usually source distributions); but is always available
from the source code management (SCM) system project uses.

View File

@ -1,78 +0,0 @@
<?xml version="1.0" encoding="UTF-8"?>
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 https://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<parent>
<groupId>org.apache.nifi</groupId>
<artifactId>nifi-hbase-bundle</artifactId>
<version>2.0.0-SNAPSHOT</version>
</parent>
<artifactId>nifi-hbase-processors</artifactId>
<description>Support for interacting with HBase</description>
<dependencies>
<dependency>
<groupId>org.apache.nifi</groupId>
<artifactId>nifi-hbase-client-service-api</artifactId>
</dependency>
<dependency>
<groupId>org.apache.nifi</groupId>
<artifactId>nifi-security-kerberos-api</artifactId>
</dependency>
<dependency>
<groupId>org.apache.nifi</groupId>
<artifactId>nifi-distributed-cache-client-service-api</artifactId>
</dependency>
<dependency>
<groupId>org.apache.nifi</groupId>
<artifactId>nifi-utils</artifactId>
</dependency>
<dependency>
<groupId>org.apache.nifi</groupId>
<artifactId>nifi-record-serialization-service-api</artifactId>
</dependency>
<dependency>
<groupId>org.apache.nifi</groupId>
<artifactId>nifi-record</artifactId>
</dependency>
<dependency>
<groupId>org.apache.commons</groupId>
<artifactId>commons-lang3</artifactId>
</dependency>
<dependency>
<groupId>com.fasterxml.jackson.core</groupId>
<artifactId>jackson-databind</artifactId>
</dependency>
<dependency>
<groupId>com.fasterxml.jackson.core</groupId>
<artifactId>jackson-core</artifactId>
</dependency>
<dependency>
<groupId>org.apache.nifi</groupId>
<artifactId>nifi-mock-record-utils</artifactId>
</dependency>
<dependency>
<groupId>org.apache.nifi</groupId>
<artifactId>nifi-record-path</artifactId>
<version>2.0.0-SNAPSHOT</version>
</dependency>
<dependency>
<groupId>org.apache.commons</groupId>
<artifactId>commons-text</artifactId>
</dependency>
</dependencies>
</project>

View File

@ -1,102 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.nifi.hbase;
import org.apache.nifi.annotation.lifecycle.OnScheduled;
import org.apache.nifi.components.PropertyDescriptor;
import org.apache.nifi.expression.ExpressionLanguageScope;
import org.apache.nifi.processor.AbstractProcessor;
import org.apache.nifi.processor.ProcessContext;
import org.apache.nifi.processor.ProcessSession;
import org.apache.nifi.processor.Relationship;
import org.apache.nifi.processor.exception.ProcessException;
import org.apache.nifi.processor.util.StandardValidators;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
public abstract class AbstractDeleteHBase extends AbstractProcessor {
protected static final PropertyDescriptor HBASE_CLIENT_SERVICE = new PropertyDescriptor.Builder()
.name("HBase Client Service")
.description("Specifies the Controller Service to use for accessing HBase.")
.required(true)
.identifiesControllerService(HBaseClientService.class)
.build();
protected static final PropertyDescriptor TABLE_NAME = new PropertyDescriptor.Builder()
.name("Table Name")
.description("The name of the HBase Table.")
.required(true)
.expressionLanguageSupported(ExpressionLanguageScope.FLOWFILE_ATTRIBUTES)
.addValidator(StandardValidators.NON_EMPTY_VALIDATOR)
.build();
protected static final PropertyDescriptor ROW_ID = new PropertyDescriptor.Builder()
.name("Row Identifier")
.description("Specifies the Row ID to use when deleting data into HBase")
.required(false) // not all sub-classes will require this
.expressionLanguageSupported(ExpressionLanguageScope.FLOWFILE_ATTRIBUTES)
.addValidator(StandardValidators.NON_EMPTY_VALIDATOR)
.build();
public static final Relationship REL_SUCCESS = new Relationship.Builder()
.name("success")
.description("A FlowFile is routed to this relationship after it has been successfully stored in HBase")
.build();
public static final Relationship REL_FAILURE = new Relationship.Builder()
.name("failure")
.description("A FlowFile is routed to this relationship if it cannot be sent to HBase")
.build();
protected HBaseClientService clientService;
@OnScheduled
public void onScheduled(final ProcessContext context) {
clientService = context.getProperty(HBASE_CLIENT_SERVICE).asControllerService(HBaseClientService.class);
}
@Override
public Set<Relationship> getRelationships() {
Set<Relationship> set = new HashSet<>();
set.add(REL_SUCCESS);
set.add(REL_FAILURE);
return set;
}
@Override
protected List<PropertyDescriptor> getSupportedPropertyDescriptors() {
final List<PropertyDescriptor> properties = new ArrayList<>();
properties.add(HBASE_CLIENT_SERVICE);
properties.add(TABLE_NAME);
properties.add(ROW_ID);
return properties;
}
@Override
public void onTrigger(ProcessContext context, ProcessSession session) throws ProcessException {
try {
doDelete(context, session);
} catch (Exception e) {
getLogger().error("Failed to perform delete", e);
}
}
protected abstract void doDelete(ProcessContext context, ProcessSession session) throws Exception;
}

View File

@ -1,276 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.nifi.hbase;
import org.apache.commons.lang3.StringUtils;
import org.apache.nifi.annotation.behavior.DynamicProperties;
import org.apache.nifi.annotation.behavior.DynamicProperty;
import org.apache.nifi.annotation.lifecycle.OnScheduled;
import org.apache.nifi.components.AllowableValue;
import org.apache.nifi.components.PropertyDescriptor;
import org.apache.nifi.expression.ExpressionLanguageScope;
import org.apache.nifi.flowfile.FlowFile;
import org.apache.nifi.hbase.put.PutFlowFile;
import org.apache.nifi.processor.AbstractProcessor;
import org.apache.nifi.processor.ProcessContext;
import org.apache.nifi.processor.ProcessSession;
import org.apache.nifi.processor.Relationship;
import org.apache.nifi.processor.exception.ProcessException;
import org.apache.nifi.processor.util.StandardValidators;
import org.apache.nifi.security.krb.KerberosLoginException;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.concurrent.TimeUnit;
/**
* Base class for processors that put data to HBase.
*/
@DynamicProperties({
@DynamicProperty(name = "visibility.<COLUMN FAMILY>", description = "Visibility label for everything under that column family " +
"when a specific label for a particular column qualifier is not available.", expressionLanguageScope = ExpressionLanguageScope.FLOWFILE_ATTRIBUTES,
value = "visibility label for <COLUMN FAMILY>"
),
@DynamicProperty(name = "visibility.<COLUMN FAMILY>.<COLUMN QUALIFIER>", description = "Visibility label for the specified column qualifier " +
"qualified by a configured column family.", expressionLanguageScope = ExpressionLanguageScope.FLOWFILE_ATTRIBUTES,
value = "visibility label for <COLUMN FAMILY>:<COLUMN QUALIFIER>."
)
})
public abstract class AbstractPutHBase extends AbstractProcessor {
protected static final PropertyDescriptor HBASE_CLIENT_SERVICE = new PropertyDescriptor.Builder()
.name("HBase Client Service")
.description("Specifies the Controller Service to use for accessing HBase.")
.required(true)
.identifiesControllerService(HBaseClientService.class)
.build();
protected static final PropertyDescriptor TABLE_NAME = new PropertyDescriptor.Builder()
.name("Table Name")
.description("The name of the HBase Table to put data into")
.required(true)
.expressionLanguageSupported(ExpressionLanguageScope.FLOWFILE_ATTRIBUTES)
.addValidator(StandardValidators.NON_EMPTY_VALIDATOR)
.build();
protected static final PropertyDescriptor ROW_ID = new PropertyDescriptor.Builder()
.name("Row Identifier")
.description("Specifies the Row ID to use when inserting data into HBase")
.required(false) // not all sub-classes will require this
.expressionLanguageSupported(ExpressionLanguageScope.FLOWFILE_ATTRIBUTES)
.addValidator(StandardValidators.NON_EMPTY_VALIDATOR)
.build();
static final String STRING_ENCODING_VALUE = "String";
static final String BYTES_ENCODING_VALUE = "Bytes";
static final String BINARY_ENCODING_VALUE = "Binary";
protected static final AllowableValue ROW_ID_ENCODING_STRING = new AllowableValue(STRING_ENCODING_VALUE, STRING_ENCODING_VALUE,
"Stores the value of row id as a UTF-8 String.");
protected static final AllowableValue ROW_ID_ENCODING_BINARY = new AllowableValue(BINARY_ENCODING_VALUE, BINARY_ENCODING_VALUE,
"Stores the value of the rows id as a binary byte array. It expects that the row id is a binary formatted string.");
static final PropertyDescriptor ROW_ID_ENCODING_STRATEGY = new PropertyDescriptor.Builder()
.name("Row Identifier Encoding Strategy")
.description("Specifies the data type of Row ID used when inserting data into HBase. The default behavior is" +
" to convert the row id to a UTF-8 byte array. Choosing Binary will convert a binary formatted string" +
" to the correct byte[] representation. The Binary option should be used if you are using Binary row" +
" keys in HBase")
.required(false) // not all sub-classes will require this
.expressionLanguageSupported(ExpressionLanguageScope.NONE)
.defaultValue(ROW_ID_ENCODING_STRING.getValue())
.allowableValues(ROW_ID_ENCODING_STRING, ROW_ID_ENCODING_BINARY)
.build();
protected static final PropertyDescriptor COLUMN_FAMILY = new PropertyDescriptor.Builder()
.name("Column Family")
.description("The Column Family to use when inserting data into HBase")
.required(true)
.expressionLanguageSupported(ExpressionLanguageScope.FLOWFILE_ATTRIBUTES)
.addValidator(StandardValidators.NON_EMPTY_VALIDATOR)
.build();
protected static final PropertyDescriptor COLUMN_QUALIFIER = new PropertyDescriptor.Builder()
.name("Column Qualifier")
.description("The Column Qualifier to use when inserting data into HBase")
.required(true)
.expressionLanguageSupported(ExpressionLanguageScope.FLOWFILE_ATTRIBUTES)
.addValidator(StandardValidators.NON_EMPTY_VALIDATOR)
.build();
protected static final PropertyDescriptor TIMESTAMP = new PropertyDescriptor.Builder()
.name("timestamp")
.displayName("Timestamp")
.description("The timestamp for the cells being created in HBase. This field can be left blank and HBase will use the current time.")
.expressionLanguageSupported(ExpressionLanguageScope.FLOWFILE_ATTRIBUTES)
.addValidator(StandardValidators.POSITIVE_LONG_VALIDATOR)
.build();
protected static final PropertyDescriptor BATCH_SIZE = new PropertyDescriptor.Builder()
.name("Batch Size")
.description("The maximum number of FlowFiles to process in a single execution. The FlowFiles will be " +
"grouped by table, and a single Put per table will be performed.")
.required(true)
.addValidator(StandardValidators.POSITIVE_INTEGER_VALIDATOR)
.defaultValue("25")
.build();
public static final Relationship REL_SUCCESS = new Relationship.Builder()
.name("success")
.description("A FlowFile is routed to this relationship after it has been successfully stored in HBase")
.build();
public static final Relationship REL_FAILURE = new Relationship.Builder()
.name("failure")
.description("A FlowFile is routed to this relationship if it cannot be sent to HBase")
.build();
protected HBaseClientService clientService;
@OnScheduled
public void onScheduled(final ProcessContext context) {
clientService = context.getProperty(HBASE_CLIENT_SERVICE).asControllerService(HBaseClientService.class);
}
@Override
protected PropertyDescriptor getSupportedDynamicPropertyDescriptor(final String propertyDescriptorName) {
if (propertyDescriptorName.startsWith("visibility.")) {
String[] parts = propertyDescriptorName.split("\\.");
String displayName;
String description;
if (parts.length == 2) {
displayName = String.format("Column Family %s Default Visibility", parts[1]);
description = String.format("Default visibility setting for %s", parts[1]);
} else if (parts.length == 3) {
displayName = String.format("Column Qualifier %s.%s Default Visibility", parts[1], parts[2]);
description = String.format("Default visibility setting for %s.%s", parts[1], parts[2]);
} else {
return null;
}
return new PropertyDescriptor.Builder()
.name(propertyDescriptorName)
.displayName(displayName)
.description(description)
.addValidator(StandardValidators.NON_BLANK_VALIDATOR)
.expressionLanguageSupported(ExpressionLanguageScope.FLOWFILE_ATTRIBUTES)
.dynamic(true)
.build();
}
return null;
}
@Override
public void onTrigger(final ProcessContext context, final ProcessSession session) throws ProcessException {
final int batchSize = context.getProperty(BATCH_SIZE).asInteger();
List<FlowFile> flowFiles = session.get(batchSize);
if (flowFiles == null || flowFiles.size() == 0) {
return;
}
final Map<String, List<PutFlowFile>> tablePuts = new HashMap<>();
// Group FlowFiles by HBase Table
for (final FlowFile flowFile : flowFiles) {
final PutFlowFile putFlowFile = createPut(session, context, flowFile);
if (putFlowFile == null) {
// sub-classes should log appropriate error messages before returning null
session.transfer(flowFile, REL_FAILURE);
} else if (!putFlowFile.isValid()) {
if (StringUtils.isBlank(putFlowFile.getTableName())) {
getLogger().error("Missing table name for FlowFile {}; routing to failure", flowFile);
} else if (null == putFlowFile.getRow()) {
getLogger().error("Missing row id for FlowFile {}; routing to failure", flowFile);
} else if (putFlowFile.getColumns() == null || putFlowFile.getColumns().isEmpty()) {
getLogger().error("No columns provided for FlowFile {}; routing to failure", flowFile);
} else {
// really shouldn't get here, but just in case
getLogger().error("Failed to produce a put for FlowFile {}; routing to failure", flowFile);
}
session.transfer(flowFile, REL_FAILURE);
} else {
List<PutFlowFile> putFlowFiles = tablePuts.get(putFlowFile.getTableName());
if (putFlowFiles == null) {
putFlowFiles = new ArrayList<>();
tablePuts.put(putFlowFile.getTableName(), putFlowFiles);
}
putFlowFiles.add(putFlowFile);
}
}
getLogger().debug("Sending {} FlowFiles to HBase in {} put operations", flowFiles.size(), tablePuts.size());
final long start = System.nanoTime();
final List<PutFlowFile> successes = new ArrayList<>();
for (Map.Entry<String, List<PutFlowFile>> entry : tablePuts.entrySet()) {
try {
clientService.put(entry.getKey(), entry.getValue());
successes.addAll(entry.getValue());
} catch (final KerberosLoginException kle) {
getLogger().error("Failed to connect to HBase due to {}: Rolling back session, and penalizing flow files", kle, kle);
session.rollback(true);
} catch (final Exception e) {
getLogger().error(e.getMessage(), e);
for (PutFlowFile putFlowFile : entry.getValue()) {
getLogger().error("Failed to send {} to HBase ", putFlowFile.getFlowFile(), e);
final FlowFile failure = session.penalize(putFlowFile.getFlowFile());
session.transfer(failure, REL_FAILURE);
}
}
}
final long sendMillis = TimeUnit.NANOSECONDS.toMillis(System.nanoTime() - start);
getLogger().debug("Sent {} FlowFiles to HBase successfully in {} milliseconds", successes.size(), sendMillis);
for (PutFlowFile putFlowFile : successes) {
session.transfer(putFlowFile.getFlowFile(), REL_SUCCESS);
final String details = "Put " + putFlowFile.getColumns().size() + " cells to HBase";
session.getProvenanceReporter().send(putFlowFile.getFlowFile(), getTransitUri(putFlowFile), details, sendMillis);
}
}
protected String getTransitUri(PutFlowFile putFlowFile) {
return clientService.toTransitUri(putFlowFile.getTableName(), new String(putFlowFile.getRow(), StandardCharsets.UTF_8));
}
protected byte[] getRow(final String row, final String encoding) {
//check to see if we need to modify the rowKey before we pass it down to the PutFlowFile
byte[] rowKeyBytes = null;
if (BINARY_ENCODING_VALUE.contentEquals(encoding)) {
rowKeyBytes = clientService.toBytesBinary(row);
} else {
rowKeyBytes = row.getBytes(StandardCharsets.UTF_8);
}
return rowKeyBytes;
}
/**
* Sub-classes provide the implementation to create a put from a FlowFile.
*
* @param session
* the current session
* @param context
* the current context
* @param flowFile
* the FlowFile to create a Put from
*
* @return a PutFlowFile instance for the given FlowFile
*/
protected abstract PutFlowFile createPut(final ProcessSession session, final ProcessContext context, final FlowFile flowFile);
}

View File

@ -1,147 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.nifi.hbase;
import org.apache.nifi.annotation.behavior.InputRequirement;
import org.apache.nifi.annotation.behavior.WritesAttribute;
import org.apache.nifi.annotation.behavior.WritesAttributes;
import org.apache.nifi.annotation.documentation.CapabilityDescription;
import org.apache.nifi.annotation.documentation.Tags;
import org.apache.nifi.components.PropertyDescriptor;
import org.apache.nifi.expression.ExpressionLanguageScope;
import org.apache.nifi.flowfile.FlowFile;
import org.apache.nifi.processor.ProcessContext;
import org.apache.nifi.processor.ProcessSession;
import org.apache.nifi.processor.util.StandardValidators;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.List;
import java.util.Scanner;
@InputRequirement(InputRequirement.Requirement.INPUT_REQUIRED)
@WritesAttributes({
@WritesAttribute(attribute = "error.line", description = "The line number of the error."),
@WritesAttribute(attribute = "error.msg", description = "The message explaining the error.")
})
@Tags({"hbase", "delete", "cell", "cells", "visibility"})
@CapabilityDescription("This processor allows the user to delete individual HBase cells by specifying one or more lines " +
"in the flowfile content that are a sequence composed of row ID, column family, column qualifier and associated visibility labels " +
"if visibility labels are enabled and in use. A user-defined separator is used to separate each of these pieces of data on each " +
"line, with :::: being the default separator.")
public class DeleteHBaseCells extends AbstractDeleteHBase {
static final PropertyDescriptor SEPARATOR = new PropertyDescriptor.Builder()
.name("delete-hbase-cell-separator")
.displayName("Separator")
.description("Each line of the flowfile content is separated into components for building a delete using this" +
"separator. It should be something other than a single colon or a comma because these are values that " +
"are associated with columns and visibility labels respectively. To delete a row with ID xyz, column family abc, " +
"column qualifier def and visibility label PII&PHI, one would specify xyz::::abc::::def::::PII&PHI given the default " +
"value")
.required(true)
.defaultValue("::::")
.addValidator(StandardValidators.NON_EMPTY_VALIDATOR)
.expressionLanguageSupported(ExpressionLanguageScope.FLOWFILE_ATTRIBUTES)
.build();
static final String ERROR_LINE = "error.line";
static final String ERROR_MSG = "error.msg";
@Override
protected List<PropertyDescriptor> getSupportedPropertyDescriptors() {
final List<PropertyDescriptor> properties = new ArrayList<>();
properties.add(HBASE_CLIENT_SERVICE);
properties.add(TABLE_NAME);
properties.add(SEPARATOR);
return properties;
}
private FlowFile writeErrorAttributes(int line, String msg, FlowFile file, ProcessSession session) {
file = session.putAttribute(file, ERROR_LINE, String.valueOf(line));
file = session.putAttribute(file, ERROR_MSG, msg != null ? msg : "");
return file;
}
private void logCell(String rowId, String family, String column, String visibility) {
StringBuilder sb = new StringBuilder()
.append("Assembling cell delete for...\t")
.append(String.format("Row ID: %s\t", rowId))
.append(String.format("Column Family: %s\t", family))
.append(String.format("Column Qualifier: %s\t", column))
.append(String.format("Visibility Label: %s", visibility));
getLogger().debug("{}", sb);
}
@Override
protected void doDelete(ProcessContext context, ProcessSession session) throws Exception {
FlowFile input = session.get();
if (input == null) {
return;
}
final String separator = context.getProperty(SEPARATOR).evaluateAttributeExpressions(input).getValue();
final String tableName = context.getProperty(TABLE_NAME).evaluateAttributeExpressions(input).getValue();
List<String> rowKeys = new ArrayList<>();
int lineNum = 1;
try (InputStream is = session.read(input)) {
Scanner scanner = new Scanner(is);
List<DeleteRequest> deletes = new ArrayList<>();
while (scanner.hasNextLine()) {
String line = scanner.nextLine().trim();
if (line.equals("")) {
continue;
}
String[] parts = line.split(separator);
if (parts.length < 3 || parts.length > 4) {
final String msg = String.format("Invalid line length. It must have 3 or 4 components. It had %d.", parts.length);
is.close();
input = writeErrorAttributes(lineNum, msg, input, session);
session.transfer(input, REL_FAILURE);
getLogger().error(msg);
return;
}
String rowId = parts[0];
String family = parts[1];
String column = parts[2];
String visibility = parts.length == 4 ? parts[3] : null;
DeleteRequest request = new DeleteRequest(rowId.getBytes(), family.getBytes(), column.getBytes(), visibility);
deletes.add(request);
if (!rowKeys.contains(rowId)) {
rowKeys.add(rowId);
}
if (getLogger().isDebugEnabled()) {
logCell(rowId, family, column, visibility);
}
lineNum++;
}
is.close();
clientService.deleteCells(tableName, deletes);
for (int index = 0; index < rowKeys.size(); index++) { //Could be many row keys in one flowfile.
session.getProvenanceReporter().invokeRemoteProcess(input, clientService.toTransitUri(tableName, rowKeys.get(index)));
}
session.transfer(input, REL_SUCCESS);
} catch (Exception ex) {
input = writeErrorAttributes(lineNum, ex.getMessage(), input, session);
session.transfer(input, REL_FAILURE);
}
}
}

View File

@ -1,230 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.nifi.hbase;
import org.apache.nifi.annotation.behavior.InputRequirement;
import org.apache.nifi.annotation.behavior.WritesAttribute;
import org.apache.nifi.annotation.behavior.WritesAttributes;
import org.apache.nifi.annotation.documentation.CapabilityDescription;
import org.apache.nifi.annotation.documentation.Tags;
import org.apache.nifi.components.AllowableValue;
import org.apache.nifi.components.PropertyDescriptor;
import org.apache.nifi.components.Validator;
import org.apache.nifi.expression.ExpressionLanguageScope;
import org.apache.nifi.flowfile.FlowFile;
import org.apache.nifi.processor.ProcessContext;
import org.apache.nifi.processor.ProcessSession;
import org.apache.nifi.processor.util.StandardValidators;
import java.io.ByteArrayOutputStream;
import java.util.ArrayList;
import java.util.List;
@WritesAttributes(
value = {
@WritesAttribute( attribute = "restart.index", description = "If a delete batch fails, 'restart.index' attribute is added to the FlowFile and sent to 'failure' " +
"relationship, so that this processor can retry from there when the same FlowFile is routed again." ),
@WritesAttribute( attribute = "rowkey.start", description = "The first rowkey in the flowfile. Only written when using the flowfile's content for the row IDs."),
@WritesAttribute( attribute = "rowkey.end", description = "The last rowkey in the flowfile. Only written when using the flowfile's content for the row IDs.")
}
)
@Tags({ "delete", "hbase" })
@CapabilityDescription(
"Delete HBase records individually or in batches. The input can be a single row ID in the flowfile content, one ID per line, " +
"row IDs separated by a configurable separator character (default is a comma). ")
@InputRequirement(InputRequirement.Requirement.INPUT_REQUIRED)
public class DeleteHBaseRow extends AbstractDeleteHBase {
static final AllowableValue ROW_ID_CONTENT = new AllowableValue("content", "FlowFile content", "Get the row key(s) from the flowfile content.");
static final AllowableValue ROW_ID_ATTR = new AllowableValue("attr", "FlowFile attributes", "Get the row key from an expression language statement.");
static final String RESTART_INDEX = "restart.index";
static final String ROWKEY_START = "rowkey.start";
static final String ROWKEY_END = "rowkey.end";
static final PropertyDescriptor ROW_ID_LOCATION = new PropertyDescriptor.Builder()
.name("delete-hb-row-id-location")
.displayName("Row ID Location")
.description("The location of the row ID to use for building the delete. Can be from the content or an expression language statement.")
.required(true)
.defaultValue(ROW_ID_CONTENT.getValue())
.allowableValues(ROW_ID_CONTENT, ROW_ID_ATTR)
.addValidator(Validator.VALID)
.build();
static final PropertyDescriptor FLOWFILE_FETCH_COUNT = new PropertyDescriptor.Builder()
.name("delete-hb-flowfile-fetch-count")
.displayName("Flowfile Fetch Count")
.description("The number of flowfiles to fetch per run.")
.required(true)
.addValidator(StandardValidators.POSITIVE_INTEGER_VALIDATOR)
.defaultValue("5")
.expressionLanguageSupported(ExpressionLanguageScope.NONE)
.build();
static final PropertyDescriptor BATCH_SIZE = new PropertyDescriptor.Builder()
.name("delete-hb-batch-size")
.displayName("Batch Size")
.description("The number of deletes to send per batch.")
.required(true)
.defaultValue("50")
.addValidator(StandardValidators.POSITIVE_INTEGER_VALIDATOR)
.expressionLanguageSupported(ExpressionLanguageScope.NONE)
.build();
static final PropertyDescriptor KEY_SEPARATOR = new PropertyDescriptor.Builder()
.name("delete-hb-separator")
.displayName("Delete Row Key Separator")
.description("The separator character(s) that separate multiple row keys " +
"when multiple row keys are provided in the flowfile content")
.required(true)
.defaultValue(",")
.addValidator(StandardValidators.NON_EMPTY_VALIDATOR)
.expressionLanguageSupported(ExpressionLanguageScope.FLOWFILE_ATTRIBUTES)
.build();
static final PropertyDescriptor CHARSET = new PropertyDescriptor.Builder()
.name("delete-char-set")
.displayName("Character Set")
.description("The character set used to encode the row key for HBase.")
.required(true)
.defaultValue("UTF-8")
.addValidator(StandardValidators.CHARACTER_SET_VALIDATOR)
.build();
static final PropertyDescriptor VISIBLITY_LABEL = new PropertyDescriptor.Builder()
.name("delete-visibility-label")
.displayName("Visibility Label")
.description("If visibility labels are enabled, a row cannot be deleted without supplying its visibility label(s) in the delete " +
"request. Note: this visibility label will be applied to all cells within the row that is specified. If some cells have " +
"different visibility labels, they will not be deleted. When that happens, the failure to delete will be considered a success " +
"because HBase does not report it as a failure.")
.required(false)
.addValidator(StandardValidators.NON_EMPTY_VALIDATOR)
.expressionLanguageSupported(ExpressionLanguageScope.FLOWFILE_ATTRIBUTES)
.build();
@Override
protected List<PropertyDescriptor> getSupportedPropertyDescriptors() {
final List<PropertyDescriptor> properties = super.getSupportedPropertyDescriptors();
properties.add(ROW_ID_LOCATION);
properties.add(FLOWFILE_FETCH_COUNT);
properties.add(BATCH_SIZE);
properties.add(KEY_SEPARATOR);
properties.add(VISIBLITY_LABEL);
properties.add(CHARSET);
return properties;
}
@Override
protected void doDelete(ProcessContext context, ProcessSession session) throws Exception {
final int batchSize = context.getProperty(BATCH_SIZE).asInteger();
final String location = context.getProperty(ROW_ID_LOCATION).getValue();
final int flowFileCount = context.getProperty(FLOWFILE_FETCH_COUNT).asInteger();
final String charset = context.getProperty(CHARSET).getValue();
List<FlowFile> flowFiles = session.get(flowFileCount);
if (flowFiles != null && flowFiles.size() > 0) {
for (int index = 0; index < flowFiles.size(); index++) {
FlowFile flowFile = flowFiles.get(index);
final String visibility = context.getProperty(VISIBLITY_LABEL).isSet()
? context.getProperty(VISIBLITY_LABEL).evaluateAttributeExpressions(flowFile).getValue() : null;
final String tableName = context.getProperty(TABLE_NAME).evaluateAttributeExpressions(flowFile).getValue();
try {
if (location.equals(ROW_ID_CONTENT.getValue())) {
flowFile = doDeleteFromContent(flowFile, context, session, tableName, batchSize, charset, visibility);
if (flowFile.getAttribute(RESTART_INDEX) != null) {
session.transfer(flowFile, REL_FAILURE);
} else {
final String transitUrl = clientService.toTransitUri(tableName, flowFile.getAttribute(ROWKEY_END));
session.transfer(flowFile, REL_SUCCESS);
session.getProvenanceReporter().invokeRemoteProcess(flowFile, transitUrl);
}
} else {
String transitUrl = doDeleteFromAttribute(flowFile, context, tableName, charset, visibility);
session.transfer(flowFile, REL_SUCCESS);
session.getProvenanceReporter().invokeRemoteProcess(flowFile, transitUrl);
}
} catch (Exception ex) {
getLogger().error(ex.getMessage(), ex);
session.transfer(flowFile, REL_FAILURE);
}
}
}
}
private String doDeleteFromAttribute(FlowFile flowFile, ProcessContext context, String tableName, String charset, String visibility) throws Exception {
String rowKey = context.getProperty(ROW_ID).evaluateAttributeExpressions(flowFile).getValue();
clientService.delete(tableName, rowKey.getBytes(charset), visibility);
return clientService.toTransitUri(tableName, rowKey);
}
private FlowFile doDeleteFromContent(FlowFile flowFile, ProcessContext context, ProcessSession session, String tableName, int batchSize, String charset, String visibility) throws Exception {
String keySeparator = context.getProperty(KEY_SEPARATOR).evaluateAttributeExpressions(flowFile).getValue();
final String restartIndex = flowFile.getAttribute(RESTART_INDEX);
ByteArrayOutputStream out = new ByteArrayOutputStream();
session.exportTo(flowFile, out);
out.close();
String data = new String(out.toByteArray(), charset);
int restartFrom = -1;
if (restartIndex != null) {
restartFrom = Integer.parseInt(restartIndex);
}
String first = null, last = null;
List<byte[]> batch = new ArrayList<>();
if (data != null && data.length() > 0) {
String[] parts = data.split(keySeparator);
int index = 0;
try {
for (index = 0; index < parts.length; index++) {
if (restartFrom > 0 && index < restartFrom) {
continue;
}
if (first == null) {
first = parts[index];
}
batch.add(parts[index].getBytes(charset));
if (batch.size() == batchSize) {
clientService.delete(tableName, batch, visibility);
batch = new ArrayList<>();
}
last = parts[index];
}
if (batch.size() > 0) {
clientService.delete(tableName, batch, visibility);
}
flowFile = session.removeAttribute(flowFile, RESTART_INDEX);
flowFile = session.putAttribute(flowFile, ROWKEY_START, first);
flowFile = session.putAttribute(flowFile, ROWKEY_END, last);
} catch (Exception ex) {
getLogger().error("Error sending delete batch", ex);
int restartPoint = index - batch.size() > 0 ? index - batch.size() : 0;
flowFile = session.putAttribute(flowFile, RESTART_INDEX, String.valueOf(restartPoint));
}
}
return flowFile;
}
}

View File

@ -1,412 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.nifi.hbase;
import org.apache.commons.lang3.StringUtils;
import org.apache.nifi.annotation.behavior.InputRequirement;
import org.apache.nifi.annotation.behavior.WritesAttribute;
import org.apache.nifi.annotation.behavior.WritesAttributes;
import org.apache.nifi.annotation.documentation.CapabilityDescription;
import org.apache.nifi.annotation.documentation.Tags;
import org.apache.nifi.annotation.lifecycle.OnScheduled;
import org.apache.nifi.components.AllowableValue;
import org.apache.nifi.components.PropertyDescriptor;
import org.apache.nifi.expression.ExpressionLanguageScope;
import org.apache.nifi.flowfile.FlowFile;
import org.apache.nifi.flowfile.attributes.CoreAttributes;
import org.apache.nifi.hbase.io.JsonFullRowSerializer;
import org.apache.nifi.hbase.io.JsonQualifierAndValueRowSerializer;
import org.apache.nifi.hbase.io.RowSerializer;
import org.apache.nifi.hbase.scan.Column;
import org.apache.nifi.hbase.scan.ResultCell;
import org.apache.nifi.hbase.scan.ResultHandler;
import org.apache.nifi.processor.AbstractProcessor;
import org.apache.nifi.processor.ProcessContext;
import org.apache.nifi.processor.ProcessSession;
import org.apache.nifi.processor.Relationship;
import org.apache.nifi.processor.exception.ProcessException;
import org.apache.nifi.processor.util.StandardValidators;
import java.nio.charset.Charset;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.regex.Pattern;
@InputRequirement(InputRequirement.Requirement.INPUT_REQUIRED)
@Tags({"hbase", "scan", "fetch", "get", "enrich"})
@CapabilityDescription("Fetches a row from an HBase table. The Destination property controls whether the cells are added as flow file attributes, " +
"or the row is written to the flow file content as JSON. This processor may be used to fetch a fixed row on a interval by specifying the " +
"table and row id directly in the processor, or it may be used to dynamically fetch rows by referencing the table and row id from " +
"incoming flow files.")
@WritesAttributes({
@WritesAttribute(attribute = "hbase.table", description = "The name of the HBase table that the row was fetched from"),
@WritesAttribute(attribute = "hbase.row", description = "A JSON document representing the row. This property is only written when a Destination of flowfile-attributes is selected."),
@WritesAttribute(attribute = "mime.type", description = "Set to application/json when using a Destination of flowfile-content, not set or modified otherwise")
})
public class FetchHBaseRow extends AbstractProcessor implements VisibilityFetchSupport {
static final Pattern COLUMNS_PATTERN = Pattern.compile("\\w+(:\\w+)?(?:,\\w+(:\\w+)?)*");
static final PropertyDescriptor HBASE_CLIENT_SERVICE = new PropertyDescriptor.Builder()
.name("HBase Client Service")
.description("Specifies the Controller Service to use for accessing HBase.")
.required(true)
.identifiesControllerService(HBaseClientService.class)
.build();
static final PropertyDescriptor TABLE_NAME = new PropertyDescriptor.Builder()
.name("Table Name")
.description("The name of the HBase Table to fetch from.")
.required(true)
.expressionLanguageSupported(ExpressionLanguageScope.FLOWFILE_ATTRIBUTES)
.addValidator(StandardValidators.NON_EMPTY_VALIDATOR)
.build();
static final PropertyDescriptor ROW_ID = new PropertyDescriptor.Builder()
.name("Row Identifier")
.description("The identifier of the row to fetch.")
.required(true)
.expressionLanguageSupported(ExpressionLanguageScope.FLOWFILE_ATTRIBUTES)
.addValidator(StandardValidators.NON_EMPTY_VALIDATOR)
.build();
static final PropertyDescriptor COLUMNS = new PropertyDescriptor.Builder()
.name("Columns")
.description("An optional comma-separated list of \"<colFamily>:<colQualifier>\" pairs to fetch. To return all columns " +
"for a given family, leave off the qualifier such as \"<colFamily1>,<colFamily2>\".")
.required(false)
.expressionLanguageSupported(ExpressionLanguageScope.FLOWFILE_ATTRIBUTES)
.addValidator(StandardValidators.createRegexMatchingValidator(COLUMNS_PATTERN))
.build();
static final AllowableValue DESTINATION_ATTRIBUTES = new AllowableValue("flowfile-attributes", "flowfile-attributes",
"Adds the JSON document representing the row that was fetched as an attribute named hbase.row. " +
"The format of the JSON document is determined by the JSON Format property. " +
"NOTE: Fetching many large rows into attributes may have a negative impact on performance.");
static final AllowableValue DESTINATION_CONTENT = new AllowableValue("flowfile-content", "flowfile-content",
"Overwrites the FlowFile content with a JSON document representing the row that was fetched. " +
"The format of the JSON document is determined by the JSON Format property.");
static final PropertyDescriptor DESTINATION = new PropertyDescriptor.Builder()
.name("Destination")
.description("Indicates whether the row fetched from HBase is written to FlowFile content or FlowFile Attributes.")
.required(true)
.allowableValues(DESTINATION_ATTRIBUTES, DESTINATION_CONTENT)
.defaultValue(DESTINATION_ATTRIBUTES.getValue())
.build();
static final AllowableValue JSON_FORMAT_FULL_ROW = new AllowableValue("full-row", "full-row",
"Creates a JSON document with the format: {\"row\":<row-id>, \"cells\":[{\"fam\":<col-fam>, \"qual\":<col-val>, \"val\":<value>, \"ts\":<timestamp>}]}.");
static final AllowableValue JSON_FORMAT_QUALIFIER_AND_VALUE = new AllowableValue("col-qual-and-val", "col-qual-and-val",
"Creates a JSON document with the format: {\"<col-qual>\":\"<value>\", \"<col-qual>\":\"<value>\".");
static final PropertyDescriptor JSON_FORMAT = new PropertyDescriptor.Builder()
.name("JSON Format")
.description("Specifies how to represent the HBase row as a JSON document.")
.required(true)
.allowableValues(JSON_FORMAT_FULL_ROW, JSON_FORMAT_QUALIFIER_AND_VALUE)
.defaultValue(JSON_FORMAT_FULL_ROW.getValue())
.build();
static final AllowableValue ENCODING_NONE = new AllowableValue("none", "none", "Creates a String using the bytes of given data and the given Character Set.");
static final AllowableValue ENCODING_BASE64 = new AllowableValue("base64", "base64", "Creates a Base64 encoded String of the given data.");
static final PropertyDescriptor JSON_VALUE_ENCODING = new PropertyDescriptor.Builder()
.name("JSON Value Encoding")
.description("Specifies how to represent row ids, column families, column qualifiers, and values when stored in FlowFile attributes, or written to JSON.")
.required(true)
.allowableValues(ENCODING_NONE, ENCODING_BASE64)
.defaultValue(ENCODING_NONE.getValue())
.build();
static final PropertyDescriptor DECODE_CHARSET = new PropertyDescriptor.Builder()
.name("Decode Character Set")
.description("The character set used to decode data from HBase.")
.required(true)
.defaultValue("UTF-8")
.addValidator(StandardValidators.CHARACTER_SET_VALIDATOR)
.build();
static final PropertyDescriptor ENCODE_CHARSET = new PropertyDescriptor.Builder()
.name("Encode Character Set")
.description("The character set used to encode the JSON representation of the row.")
.required(true)
.defaultValue("UTF-8")
.addValidator(StandardValidators.CHARACTER_SET_VALIDATOR)
.build();
static final Relationship REL_SUCCESS = new Relationship.Builder()
.name("success")
.description("All successful fetches are routed to this relationship.")
.build();
static final Relationship REL_FAILURE = new Relationship.Builder()
.name("failure")
.description("All failed fetches are routed to this relationship.")
.build();
static final Relationship REL_NOT_FOUND = new Relationship.Builder()
.name("not found")
.description("All fetches where the row id is not found are routed to this relationship.")
.build();
static final String HBASE_TABLE_ATTR = "hbase.table";
static final String HBASE_ROW_ATTR = "hbase.row";
static final List<PropertyDescriptor> properties;
static {
List<PropertyDescriptor> props = new ArrayList<>();
props.add(HBASE_CLIENT_SERVICE);
props.add(TABLE_NAME);
props.add(ROW_ID);
props.add(COLUMNS);
props.add(AUTHORIZATIONS);
props.add(DESTINATION);
props.add(JSON_FORMAT);
props.add(JSON_VALUE_ENCODING);
props.add(ENCODE_CHARSET);
props.add(DECODE_CHARSET);
properties = Collections.unmodifiableList(props);
}
static final Set<Relationship> relationships;
static {
Set<Relationship> rels = new HashSet<>();
rels.add(REL_SUCCESS);
rels.add(REL_FAILURE);
rels.add(REL_NOT_FOUND);
relationships = Collections.unmodifiableSet(rels);
}
private volatile Charset decodeCharset;
private volatile Charset encodeCharset;
private volatile RowSerializer regularRowSerializer;
private volatile RowSerializer base64RowSerializer;
@Override
protected List<PropertyDescriptor> getSupportedPropertyDescriptors() {
return properties;
}
@Override
public Set<Relationship> getRelationships() {
return relationships;
}
@OnScheduled
public void onScheduled(ProcessContext context) {
this.decodeCharset = Charset.forName(context.getProperty(DECODE_CHARSET).getValue());
this.encodeCharset = Charset.forName(context.getProperty(ENCODE_CHARSET).getValue());
final String jsonFormat = context.getProperty(JSON_FORMAT).getValue();
if (jsonFormat.equals(JSON_FORMAT_FULL_ROW.getValue())) {
this.regularRowSerializer = new JsonFullRowSerializer(decodeCharset, encodeCharset);
this.base64RowSerializer = new JsonFullRowSerializer(decodeCharset, encodeCharset, true);
} else {
this.regularRowSerializer = new JsonQualifierAndValueRowSerializer(decodeCharset, encodeCharset);
this.base64RowSerializer = new JsonQualifierAndValueRowSerializer(decodeCharset, encodeCharset, true);
}
}
@Override
public void onTrigger(ProcessContext context, ProcessSession session) throws ProcessException {
final FlowFile flowFile = session.get();
if (flowFile == null) {
return;
}
final String tableName = context.getProperty(TABLE_NAME).evaluateAttributeExpressions(flowFile).getValue();
if (StringUtils.isBlank(tableName)) {
getLogger().error("Table Name is blank or null for {}, transferring to failure", flowFile);
session.transfer(session.penalize(flowFile), REL_FAILURE);
return;
}
final String rowId = context.getProperty(ROW_ID).evaluateAttributeExpressions(flowFile).getValue();
if (StringUtils.isBlank(rowId)) {
getLogger().error("Row Identifier is blank or null for {}, transferring to failure", flowFile);
session.transfer(session.penalize(flowFile), REL_FAILURE);
return;
}
final List<Column> columns = getColumns(context.getProperty(COLUMNS).evaluateAttributeExpressions(flowFile).getValue());
final HBaseClientService hBaseClientService = context.getProperty(HBASE_CLIENT_SERVICE).asControllerService(HBaseClientService.class);
final String destination = context.getProperty(DESTINATION).getValue();
final boolean base64Encode = context.getProperty(JSON_VALUE_ENCODING).getValue().equals(ENCODING_BASE64.getValue());
List<String> authorizations = getAuthorizations(context, flowFile);
final RowSerializer rowSerializer = base64Encode ? base64RowSerializer : regularRowSerializer;
final FetchHBaseRowHandler handler = destination.equals(DESTINATION_CONTENT.getValue())
? new FlowFileContentHandler(flowFile, session, rowSerializer) : new FlowFileAttributeHandler(flowFile, session, rowSerializer);
final byte[] rowIdBytes = rowId.getBytes(StandardCharsets.UTF_8);
try {
hBaseClientService.scan(tableName, rowIdBytes, rowIdBytes, columns, authorizations, handler);
} catch (Exception e) {
getLogger().error("Unable to fetch row {} from {}", rowId, tableName, e);
session.transfer(handler.getFlowFile(), REL_FAILURE);
return;
}
FlowFile handlerFlowFile = handler.getFlowFile();
if (!handler.handledRow()) {
getLogger().debug("Row {} not found in {}, transferring to not found", rowId, tableName);
session.transfer(handlerFlowFile, REL_NOT_FOUND);
return;
}
if (getLogger().isDebugEnabled()) {
getLogger().debug("Fetched {} from {} with row id {}", handlerFlowFile, tableName, rowId);
}
final Map<String, String> attributes = new HashMap<>();
attributes.put(HBASE_TABLE_ATTR, tableName);
if (destination.equals(DESTINATION_CONTENT.getValue())) {
attributes.put(CoreAttributes.MIME_TYPE.key(), "application/json");
}
handlerFlowFile = session.putAllAttributes(handlerFlowFile, attributes);
final String transitUri = hBaseClientService.toTransitUri(tableName, rowId);
// Regardless to where the result is written to, emit a fetch event.
session.getProvenanceReporter().fetch(handlerFlowFile, transitUri);
if (!destination.equals(DESTINATION_CONTENT.getValue())) {
session.getProvenanceReporter().modifyAttributes(handlerFlowFile, "Added attributes to FlowFile from " + transitUri);
}
session.transfer(handlerFlowFile, REL_SUCCESS);
}
/**
* @param columnsValue a String in the form colFam:colQual,colFam:colQual
* @return a list of Columns based on parsing the given String
*/
private List<Column> getColumns(final String columnsValue) {
final String[] columns = (columnsValue == null || columnsValue.isEmpty() ? new String[0] : columnsValue.split(","));
List<Column> columnsList = new ArrayList<>(columns.length);
for (final String column : columns) {
if (column.contains(":")) {
final String[] parts = column.split(":");
final byte[] cf = parts[0].getBytes(StandardCharsets.UTF_8);
final byte[] cq = parts[1].getBytes(StandardCharsets.UTF_8);
columnsList.add(new Column(cf, cq));
} else {
final byte[] cf = column.getBytes(StandardCharsets.UTF_8);
columnsList.add(new Column(cf, null));
}
}
return columnsList;
}
/**
* A ResultHandler that also provides access to a resulting FlowFile reference.
*/
private interface FetchHBaseRowHandler extends ResultHandler {
/**
* @return returns the flow file reference that was used by this handler
*/
FlowFile getFlowFile();
/**
* @return returns true if this handler handled a row
*/
boolean handledRow();
}
/**
* A FetchHBaseRowHandler that writes the resulting row to the FlowFile content.
*/
private static class FlowFileContentHandler implements FetchHBaseRowHandler {
private FlowFile flowFile;
private final ProcessSession session;
private final RowSerializer serializer;
private boolean handledRow = false;
public FlowFileContentHandler(final FlowFile flowFile, final ProcessSession session, final RowSerializer serializer) {
this.flowFile = flowFile;
this.session = session;
this.serializer = serializer;
}
@Override
public void handle(byte[] row, ResultCell[] resultCells) {
flowFile = session.write(flowFile, (out) -> {
serializer.serialize(row, resultCells, out);
});
handledRow = true;
}
@Override
public FlowFile getFlowFile() {
return flowFile;
}
@Override
public boolean handledRow() {
return handledRow;
}
}
/**
* A FetchHBaseRowHandler that writes the resulting row to FlowFile attributes.
*/
private static class FlowFileAttributeHandler implements FetchHBaseRowHandler {
private FlowFile flowFile;
private final ProcessSession session;
private final RowSerializer rowSerializer;
private boolean handledRow = false;
public FlowFileAttributeHandler(final FlowFile flowFile, final ProcessSession session, final RowSerializer serializer) {
this.flowFile = flowFile;
this.session = session;
this.rowSerializer = serializer;
}
@Override
public void handle(byte[] row, ResultCell[] resultCells) {
final String serializedRow = rowSerializer.serialize(row, resultCells);
flowFile = session.putAttribute(flowFile, HBASE_ROW_ATTR, serializedRow);
handledRow = true;
}
@Override
public FlowFile getFlowFile() {
return flowFile;
}
@Override
public boolean handledRow() {
return handledRow;
}
}
}

View File

@ -1,519 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.nifi.hbase;
import org.apache.commons.lang3.StringUtils;
import org.apache.nifi.annotation.behavior.InputRequirement;
import org.apache.nifi.annotation.behavior.Stateful;
import org.apache.nifi.annotation.behavior.TriggerSerially;
import org.apache.nifi.annotation.behavior.TriggerWhenEmpty;
import org.apache.nifi.annotation.behavior.WritesAttribute;
import org.apache.nifi.annotation.behavior.WritesAttributes;
import org.apache.nifi.annotation.configuration.DefaultSchedule;
import org.apache.nifi.annotation.documentation.CapabilityDescription;
import org.apache.nifi.annotation.documentation.Tags;
import org.apache.nifi.annotation.lifecycle.OnScheduled;
import org.apache.nifi.components.AllowableValue;
import org.apache.nifi.components.PropertyDescriptor;
import org.apache.nifi.components.ValidationContext;
import org.apache.nifi.components.ValidationResult;
import org.apache.nifi.components.state.Scope;
import org.apache.nifi.components.state.StateMap;
import org.apache.nifi.expression.ExpressionLanguageScope;
import org.apache.nifi.flowfile.FlowFile;
import org.apache.nifi.hbase.io.JsonRowSerializer;
import org.apache.nifi.hbase.io.RowSerializer;
import org.apache.nifi.hbase.scan.Column;
import org.apache.nifi.hbase.scan.ResultCell;
import org.apache.nifi.migration.PropertyConfiguration;
import org.apache.nifi.processor.AbstractProcessor;
import org.apache.nifi.processor.ProcessContext;
import org.apache.nifi.processor.ProcessSession;
import org.apache.nifi.processor.Relationship;
import org.apache.nifi.processor.exception.ProcessException;
import org.apache.nifi.processor.util.StandardValidators;
import java.io.IOException;
import java.io.Serializable;
import java.nio.charset.Charset;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.atomic.AtomicReference;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.nifi.scheduling.SchedulingStrategy;
@TriggerWhenEmpty
@TriggerSerially
@InputRequirement(InputRequirement.Requirement.INPUT_FORBIDDEN)
@Tags({"hbase", "get", "ingest"})
@CapabilityDescription("This Processor polls HBase for any records in the specified table. The processor keeps track of the timestamp of the cells that "
+ "it receives, so that as new records are pushed to HBase, they will automatically be pulled. Each record is output in JSON format, as "
+ "{\"row\": \"<row key>\", \"cells\": { \"<column 1 family>:<column 1 qualifier>\": \"<cell 1 value>\", \"<column 2 family>:<column 2 qualifier>\": \"<cell 2 value>\", ... }}. "
+ "For each record received, a Provenance RECEIVE event is emitted with the format hbase://<table name>/<row key>, where <row key> is the UTF-8 encoded value of the row's key.")
@WritesAttributes({
@WritesAttribute(attribute = "hbase.table", description = "The name of the HBase table that the data was pulled from"),
@WritesAttribute(attribute = "mime.type", description = "Set to application/json to indicate that output is JSON")
})
@Stateful(scopes = Scope.CLUSTER, description = "After performing a fetching from HBase, stores a timestamp of the last-modified cell that was found. In addition, it stores the ID of the row(s) "
+ "and the value of each cell that has that timestamp as its modification date. This is stored across the cluster and allows the next fetch to avoid duplicating data, even if this Processor is "
+ "run on Primary Node only and the Primary Node changes.")
@DefaultSchedule(strategy = SchedulingStrategy.TIMER_DRIVEN, period = "1 min")
public class GetHBase extends AbstractProcessor implements VisibilityFetchSupport {
static final Pattern COLUMNS_PATTERN = Pattern.compile("\\w+(:\\w+)?(?:,\\w+(:\\w+)?)*");
static final AllowableValue NONE = new AllowableValue("None", "None");
static final AllowableValue CURRENT_TIME = new AllowableValue("Current Time", "Current Time");
static final PropertyDescriptor HBASE_CLIENT_SERVICE = new PropertyDescriptor.Builder()
.name("HBase Client Service")
.description("Specifies the Controller Service to use for accessing HBase.")
.required(true)
.identifiesControllerService(HBaseClientService.class)
.build();
static final PropertyDescriptor CHARSET = new PropertyDescriptor.Builder()
.name("Character Set")
.description("Specifies which character set is used to encode the data in HBase")
.required(true)
.expressionLanguageSupported(ExpressionLanguageScope.ENVIRONMENT)
.defaultValue("UTF-8")
.addValidator(StandardValidators.CHARACTER_SET_VALIDATOR)
.build();
static final PropertyDescriptor TABLE_NAME = new PropertyDescriptor.Builder()
.name("Table Name")
.description("The name of the HBase Table to put data into")
.required(true)
.expressionLanguageSupported(ExpressionLanguageScope.ENVIRONMENT)
.addValidator(StandardValidators.NON_EMPTY_VALIDATOR)
.build();
static final PropertyDescriptor COLUMNS = new PropertyDescriptor.Builder()
.name("Columns")
.description("A comma-separated list of \"<colFamily>:<colQualifier>\" pairs to return when scanning. To return all columns " +
"for a given family, leave off the qualifier such as \"<colFamily1>,<colFamily2>\".")
.required(false)
.expressionLanguageSupported(ExpressionLanguageScope.ENVIRONMENT)
.addValidator(StandardValidators.createRegexMatchingValidator(COLUMNS_PATTERN))
.build();
static final PropertyDescriptor FILTER_EXPRESSION = new PropertyDescriptor.Builder()
.name("Filter Expression")
.description("An HBase filter expression that will be applied to the scan. This property can not be used when also using the Columns property.")
.required(false)
.expressionLanguageSupported(ExpressionLanguageScope.ENVIRONMENT)
.addValidator(StandardValidators.NON_EMPTY_VALIDATOR)
.build();
static final PropertyDescriptor INITIAL_TIMERANGE = new PropertyDescriptor.Builder()
.name("Initial Time Range")
.description("The time range to use on the first scan of a table. None will pull the entire table on the first scan, " +
"Current Time will pull entries from that point forward.")
.required(true)
.expressionLanguageSupported(ExpressionLanguageScope.NONE)
.allowableValues(NONE, CURRENT_TIME)
.defaultValue(NONE.getValue())
.build();
static final Relationship REL_SUCCESS = new Relationship.Builder()
.name("success")
.description("All FlowFiles are routed to this relationship")
.build();
private final AtomicReference<ScanResult> lastResult = new AtomicReference<>();
private final List<Column> columns = new ArrayList<>();
private volatile String previousTable = null;
@Override
public Set<Relationship> getRelationships() {
return Collections.singleton(REL_SUCCESS);
}
@Override
protected List<PropertyDescriptor> getSupportedPropertyDescriptors() {
final List<PropertyDescriptor> properties = new ArrayList<>();
properties.add(HBASE_CLIENT_SERVICE);
properties.add(TABLE_NAME);
properties.add(COLUMNS);
properties.add(AUTHORIZATIONS);
properties.add(FILTER_EXPRESSION);
properties.add(INITIAL_TIMERANGE);
properties.add(CHARSET);
return properties;
}
@Override
protected Collection<ValidationResult> customValidate(ValidationContext validationContext) {
final String columns = validationContext.getProperty(COLUMNS).evaluateAttributeExpressions().getValue();
final String filter = validationContext.getProperty(FILTER_EXPRESSION).evaluateAttributeExpressions().getValue();
final List<ValidationResult> problems = new ArrayList<>();
if (!StringUtils.isBlank(columns) && !StringUtils.isBlank(filter)) {
problems.add(new ValidationResult.Builder()
.subject(FILTER_EXPRESSION.getDisplayName())
.input(filter).valid(false)
.explanation("a filter expression can not be used in conjunction with the Columns property")
.build());
}
return problems;
}
@Override
public void onPropertyModified(final PropertyDescriptor descriptor, final String oldValue, final String newValue) {
if (descriptor.equals(TABLE_NAME)) {
lastResult.set(null);
}
}
@Override
public void migrateProperties(PropertyConfiguration config) {
super.migrateProperties(config);
config.removeProperty("Distributed Cache Service");
}
@OnScheduled
public void parseColumns(final ProcessContext context) throws IOException {
final String columnsValue = context.getProperty(COLUMNS).evaluateAttributeExpressions().getValue();
final String[] columns = (columnsValue == null || columnsValue.isEmpty() ? new String[0] : columnsValue.split(","));
this.columns.clear();
for (final String column : columns) {
if (column.contains(":")) {
final String[] parts = column.split(":");
final byte[] cf = parts[0].getBytes(StandardCharsets.UTF_8);
final byte[] cq = parts[1].getBytes(StandardCharsets.UTF_8);
this.columns.add(new Column(cf, cq));
} else {
final byte[] cf = column.getBytes(StandardCharsets.UTF_8);
this.columns.add(new Column(cf, null));
}
}
}
@Override
public void onTrigger(final ProcessContext context, final ProcessSession session) throws ProcessException {
final String tableName = context.getProperty(TABLE_NAME).evaluateAttributeExpressions().getValue();
final String initialTimeRange = context.getProperty(INITIAL_TIMERANGE).getValue();
final String filterExpression = context.getProperty(FILTER_EXPRESSION).evaluateAttributeExpressions().getValue();
List<String> authorizations = getAuthorizations(context, null);
final HBaseClientService hBaseClientService = context.getProperty(HBASE_CLIENT_SERVICE).asControllerService(HBaseClientService.class);
// if the table was changed then remove any previous state
if (previousTable != null && !tableName.equals(previousTable)) {
try {
session.clearState(Scope.CLUSTER);
} catch (final IOException ioe) {
getLogger().warn("Failed to clear Cluster State", ioe);
}
previousTable = tableName;
}
try {
final Charset charset = Charset.forName(context.getProperty(CHARSET).evaluateAttributeExpressions().getValue());
final RowSerializer serializer = new JsonRowSerializer(charset);
this.lastResult.set(getState(session));
final long defaultMinTime = (initialTimeRange.equals(NONE.getValue()) ? 0L : System.currentTimeMillis());
final long minTime = (lastResult.get() == null ? defaultMinTime : lastResult.get().getTimestamp());
final Map<String, Set<String>> cellsMatchingTimestamp = new HashMap<>();
final AtomicReference<Long> rowsPulledHolder = new AtomicReference<>(0L);
final AtomicReference<Long> latestTimestampHolder = new AtomicReference<>(minTime);
hBaseClientService.scan(tableName, columns, filterExpression, minTime, authorizations, (rowKey, resultCells) -> {
final String rowKeyString = new String(rowKey, StandardCharsets.UTF_8);
// check if latest cell timestamp is equal to our cutoff.
// if any of the cells have a timestamp later than our cutoff, then we
// want the row. But if the cell with the latest timestamp is equal to
// our cutoff, then we want to check if that's one of the cells that
// we have already seen.
long latestCellTimestamp = 0L;
for (final ResultCell cell : resultCells) {
if (cell.getTimestamp() > latestCellTimestamp) {
latestCellTimestamp = cell.getTimestamp();
}
}
// we've already seen this.
if (latestCellTimestamp < minTime) {
getLogger().debug("latest cell timestamp for row {} is {}, which is earlier than the minimum time of {}",
new Object[] {rowKeyString, latestCellTimestamp, minTime});
return;
}
if (latestCellTimestamp == minTime) {
// latest cell timestamp is equal to our minimum time. Check if all cells that have
// that timestamp are in our list of previously seen cells.
boolean allSeen = true;
for (final ResultCell cell : resultCells) {
if (cell.getTimestamp() == latestCellTimestamp) {
final ScanResult latestResult = lastResult.get();
if (latestResult == null || !latestResult.contains(cell)) {
allSeen = false;
break;
}
}
}
if (allSeen) {
// we have already seen all of the cells for this row. We do not want to
// include this cell in our output.
getLogger().debug("all cells for row {} have already been seen", rowKeyString);
return;
}
}
// If the latest timestamp of the cell is later than the latest timestamp we have already seen,
// we want to keep track of the cells that match this timestamp so that the next time we scan,
// we can ignore these cells.
if (latestCellTimestamp >= latestTimestampHolder.get()) {
// new timestamp, so clear all of the 'matching cells'
if (latestCellTimestamp > latestTimestampHolder.get()) {
latestTimestampHolder.set(latestCellTimestamp);
cellsMatchingTimestamp.clear();
}
for (final ResultCell cell : resultCells) {
final long ts = cell.getTimestamp();
if (ts == latestCellTimestamp) {
final byte[] rowValue = Arrays.copyOfRange(cell.getRowArray(), cell.getRowOffset(), cell.getRowLength() + cell.getRowOffset());
final byte[] cellValue = Arrays.copyOfRange(cell.getValueArray(), cell.getValueOffset(), cell.getValueLength() + cell.getValueOffset());
final String rowHash = new String(rowValue, StandardCharsets.UTF_8);
Set<String> cellHashes = cellsMatchingTimestamp.computeIfAbsent(rowHash, k -> new HashSet<>());
cellHashes.add(new String(cellValue, StandardCharsets.UTF_8));
}
}
}
// write the row to a new FlowFile.
FlowFile flowFile = session.create();
flowFile = session.write(flowFile, out -> serializer.serialize(rowKey, resultCells, out));
final Map<String, String> attributes = new HashMap<>();
attributes.put("hbase.table", tableName);
attributes.put("mime.type", "application/json");
flowFile = session.putAllAttributes(flowFile, attributes);
session.getProvenanceReporter().receive(flowFile, hBaseClientService.toTransitUri(tableName, rowKeyString));
session.transfer(flowFile, REL_SUCCESS);
getLogger().debug("Received {} from HBase with row key {}", flowFile, rowKeyString);
// we could potentially have a huge number of rows. If we get to 500, go ahead and commit the
// session so that we can avoid buffering tons of FlowFiles without ever sending any out.
long rowsPulled = rowsPulledHolder.get();
rowsPulledHolder.set(++rowsPulled);
if (++rowsPulled % getBatchSize() == 0) {
updateStateAndCommit(session, latestTimestampHolder.get(), cellsMatchingTimestamp);
}
});
updateStateAndCommit(session, latestTimestampHolder.get(), cellsMatchingTimestamp);
} catch (final IOException e) {
getLogger().error("Failed to receive data from HBase due to {}", e);
session.rollback();
} finally {
// if we failed, we want to yield so that we don't hammer hbase. If we succeed, then we have
// pulled all of the records, so we want to wait a bit before hitting hbase again anyway.
context.yield();
}
}
private void updateStateAndCommit(final ProcessSession session, final long latestTimestamp, final Map<String, Set<String>> cellsMatchingTimestamp) throws IOException {
final ScanResult scanResults = new ScanResult(latestTimestamp, cellsMatchingTimestamp);
final ScanResult latestResult = lastResult.get();
if (latestResult == null || scanResults.getTimestamp() > latestResult.getTimestamp()) {
session.setState(scanResults.toFlatMap(), Scope.CLUSTER);
session.commitAsync(() -> updateScanResultsIfNewer(scanResults));
} else if (scanResults.getTimestamp() == latestResult.getTimestamp()) {
final Map<String, Set<String>> combinedResults = new HashMap<>(scanResults.getMatchingCells());
// copy the results of result.getMatchingCells() to combinedResults.
// do a deep copy because the Set may be modified below.
for (final Map.Entry<String, Set<String>> entry : scanResults.getMatchingCells().entrySet()) {
combinedResults.put(entry.getKey(), new HashSet<>(entry.getValue()));
}
// combined the results from 'lastResult'
for (final Map.Entry<String, Set<String>> entry : latestResult.getMatchingCells().entrySet()) {
final Set<String> existing = combinedResults.get(entry.getKey());
if (existing == null) {
combinedResults.put(entry.getKey(), new HashSet<>(entry.getValue()));
} else {
existing.addAll(entry.getValue());
}
}
final ScanResult scanResult = new ScanResult(scanResults.getTimestamp(), combinedResults);
session.setState(scanResult.toFlatMap(), Scope.CLUSTER);
session.commitAsync(() -> updateScanResultsIfNewer(scanResult));
}
}
private void updateScanResultsIfNewer(final ScanResult scanResult) {
lastResult.getAndUpdate(current -> (current == null || scanResult.getTimestamp() > current.getTimestamp()) ? scanResult : current);
}
// present for tests
protected int getBatchSize() {
return 500;
}
protected List<Column> getColumns() {
return columns;
}
private ScanResult getState(final ProcessSession session) throws IOException {
final StateMap stateMap = session.getState(Scope.CLUSTER);
if (!stateMap.getStateVersion().isPresent()) {
return null;
}
return ScanResult.fromFlatMap(stateMap.toMap());
}
public static class ScanResult implements Serializable {
private static final long serialVersionUID = 1L;
private final long latestTimestamp;
private final Map<String, Set<String>> matchingCellHashes;
private static final Pattern CELL_ID_PATTERN = Pattern.compile(Pattern.quote(StateKeys.ROW_ID_PREFIX) + "(\\d+)(\\.(\\d+))?");
public static class StateKeys {
public static final String TIMESTAMP = "timestamp";
public static final String ROW_ID_PREFIX = "row.";
}
public ScanResult(final long timestamp, final Map<String, Set<String>> cellHashes) {
latestTimestamp = timestamp;
matchingCellHashes = cellHashes;
}
public long getTimestamp() {
return latestTimestamp;
}
public Map<String, Set<String>> getMatchingCells() {
return matchingCellHashes;
}
public boolean contains(final ResultCell cell) {
if (cell.getTimestamp() != latestTimestamp) {
return false;
}
final byte[] row = Arrays.copyOfRange(cell.getRowArray(), cell.getRowOffset(), cell.getRowLength() + cell.getRowOffset());
final String rowHash = new String(row, StandardCharsets.UTF_8);
final Set<String> cellHashes = matchingCellHashes.get(rowHash);
if (cellHashes == null) {
return false;
}
final byte[] cellValue = Arrays.copyOfRange(cell.getValueArray(), cell.getValueOffset(), cell.getValueLength() + cell.getValueOffset());
final String cellHash = new String(cellValue, StandardCharsets.UTF_8);
return cellHashes.contains(cellHash);
}
public Map<String, String> toFlatMap() {
final Map<String, String> map = new HashMap<>();
map.put(StateKeys.TIMESTAMP, String.valueOf(latestTimestamp));
int rowCounter = 0;
for (final Map.Entry<String, Set<String>> entry : matchingCellHashes.entrySet()) {
final String rowId = entry.getKey();
final String rowIdKey = StateKeys.ROW_ID_PREFIX + rowCounter;
final String cellKeyPrefix = rowIdKey + ".";
map.put(rowIdKey, rowId);
final Set<String> cellValues = entry.getValue();
int cellCounter = 0;
for (final String cellValue : cellValues) {
final String cellId = cellKeyPrefix + (cellCounter++);
map.put(cellId, cellValue);
}
rowCounter++;
}
return map;
}
public static ScanResult fromFlatMap(final Map<String, String> map) {
if (map == null) {
return null;
}
final String timestampValue = map.get(StateKeys.TIMESTAMP);
if (timestampValue == null) {
return null;
}
final long timestamp = Long.parseLong(timestampValue);
final Map<String, Set<String>> rowIndexToMatchingCellHashes = new HashMap<>();
final Map<String, String> rowIndexToId = new HashMap<>();
for (final Map.Entry<String, String> entry : map.entrySet()) {
final String key = entry.getKey();
final Matcher matcher = CELL_ID_PATTERN.matcher(key);
if (!matcher.matches()) {
// if it's not a valid key, move on.
continue;
}
final String rowIndex = matcher.group(1);
final String cellIndex = matcher.group(3);
Set<String> cellHashes = rowIndexToMatchingCellHashes.computeIfAbsent(rowIndex, k -> new HashSet<>());
if (cellIndex == null) {
// this provides a Row ID.
rowIndexToId.put(rowIndex, entry.getValue());
} else {
cellHashes.add(entry.getValue());
}
}
final Map<String, Set<String>> matchingCellHashes = new HashMap<>(rowIndexToMatchingCellHashes.size());
for (final Map.Entry<String, Set<String>> entry : rowIndexToMatchingCellHashes.entrySet()) {
final String rowIndex = entry.getKey();
final String rowId = rowIndexToId.get(rowIndex);
final Set<String> cellValues = entry.getValue();
matchingCellHashes.put(rowId, cellValues);
}
return new ScanResult(timestamp, matchingCellHashes);
}
}
}

View File

@ -1,151 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.nifi.hbase;
import org.apache.commons.lang3.StringUtils;
import org.apache.nifi.annotation.behavior.InputRequirement;
import org.apache.nifi.annotation.behavior.WritesAttribute;
import org.apache.nifi.annotation.behavior.WritesAttributes;
import org.apache.nifi.annotation.documentation.CapabilityDescription;
import org.apache.nifi.annotation.documentation.Tags;
import org.apache.nifi.components.PropertyDescriptor;
import org.apache.nifi.expression.ExpressionLanguageScope;
import org.apache.nifi.flowfile.FlowFile;
import org.apache.nifi.hbase.scan.HBaseRegion;
import org.apache.nifi.processor.AbstractProcessor;
import org.apache.nifi.processor.ProcessContext;
import org.apache.nifi.processor.ProcessSession;
import org.apache.nifi.processor.Relationship;
import org.apache.nifi.processor.exception.ProcessException;
import org.apache.nifi.processor.util.StandardValidators;
import java.nio.charset.StandardCharsets;
import java.util.List;
import java.util.Set;
@InputRequirement(InputRequirement.Requirement.INPUT_FORBIDDEN)
@Tags({"hbase", "regions", "scan", "rowkey"})
@CapabilityDescription("Returns the information about the regions of an HBase table, including ID, name and row key ranges. " +
"This information is helpful to feed into start row key and end row key for scans to HBase, e.g. using the ScanHBase processor.")
@WritesAttributes({
@WritesAttribute(attribute = "hbase.region.name", description = "The name of the HBase region."),
@WritesAttribute(attribute = "hbase.region.id", description = "The id of the HBase region."),
@WritesAttribute(attribute = "hbase.region.startRowKey", description = "The starting row key (inclusive) of the HBase region. " +
"The bytes returned from HBase is converted into a UTF-8 encoded string."),
@WritesAttribute(attribute = "hbase.region.endRowKey", description = "The ending row key (exclusive) of the HBase region. " +
"The bytes returned from HBase is converted into a UTF-8 encoded string.")
})
public class ListHBaseRegions extends AbstractProcessor {
static final String HBASE_REGION_NAME_ATTR = "hbase.region.name";
static final String HBASE_REGION_ID_ATTR = "hbase.region.id";
static final String HBASE_REGION_START_ROW_ATTR = "hbase.region.startRowKey";
static final String HBASE_REGION_END_ROW_ATTR = "hbase.region.endRowKey";
static final PropertyDescriptor HBASE_CLIENT_SERVICE = new PropertyDescriptor.Builder()
.name("HBase Client Service")
.description("Specifies the Controller Service to use for accessing HBase.")
.required(true)
.identifiesControllerService(HBaseClientService.class)
.build();
static final PropertyDescriptor TABLE_NAME = new PropertyDescriptor.Builder()
.name("Table Name")
.description("The name of the HBase Table to put data into")
.required(true)
.expressionLanguageSupported(ExpressionLanguageScope.ENVIRONMENT)
.addValidator(StandardValidators.NON_EMPTY_VALIDATOR)
.build();
static final PropertyDescriptor ROUTE_DEGENERATE_REGIONS = new PropertyDescriptor.Builder()
.name("Route Degenerate Regions")
.required(false)
.defaultValue("false")
.allowableValues("true", "false")
.addValidator(StandardValidators.BOOLEAN_VALIDATOR)
.build();
static final Relationship REL_SUCCESS = new Relationship.Builder()
.name("success")
.description("FlowFiles with information on regions of the HBase table are routed to this relationship.")
.build();
static final Relationship REL_DEGENERATE = new Relationship.Builder()
.name("degenerate")
.description("If \\\"Route Degenerate Regions\\\" is set, any " +
"FlowFile(s) that contains information about a region that is degenerate will be routed " +
"to this relationship. Otherwise, they will be sent to the success relationship.")
.autoTerminateDefault(true)
.build();
@Override
public Set<Relationship> getRelationships() {
return Set.of(REL_SUCCESS, REL_DEGENERATE);
}
@Override
protected List<PropertyDescriptor> getSupportedPropertyDescriptors() {
return List.of(
HBASE_CLIENT_SERVICE,
TABLE_NAME,
ROUTE_DEGENERATE_REGIONS
);
}
@Override
public void onTrigger(final ProcessContext context, final ProcessSession session) throws ProcessException {
final String tableName = context.getProperty(TABLE_NAME).evaluateAttributeExpressions().getValue();
if (StringUtils.isBlank(tableName)) {
getLogger().error("Table Name is blank or null, no regions information to be fetched.");
context.yield();
return;
}
final HBaseClientService hBaseClientService = context.getProperty(HBASE_CLIENT_SERVICE).asControllerService(HBaseClientService.class);
final boolean routeDegenerateRegions = context.getProperty(ROUTE_DEGENERATE_REGIONS).asBoolean();
try {
final List<HBaseRegion> hBaseRegions = hBaseClientService.listHBaseRegions(tableName);
for (final HBaseRegion region : hBaseRegions) {
final FlowFile flowFile = session.create();
session.putAttribute(flowFile, HBASE_REGION_NAME_ATTR, region.getRegionName());
session.putAttribute(flowFile, HBASE_REGION_ID_ATTR, String.valueOf(region.getRegionId()));
if (region.getStartRowKey() == null) {
session.putAttribute(flowFile, HBASE_REGION_START_ROW_ATTR, "");
} else {
session.putAttribute(flowFile, HBASE_REGION_START_ROW_ATTR, new String(region.getStartRowKey(), StandardCharsets.UTF_8));
}
if (region.getEndRowKey() == null) {
session.putAttribute(flowFile, HBASE_REGION_END_ROW_ATTR, "");
} else {
session.putAttribute(flowFile, HBASE_REGION_END_ROW_ATTR, new String(region.getEndRowKey(), StandardCharsets.UTF_8));
}
if (region.isDegenerate() && routeDegenerateRegions) {
getLogger().warn("Region with id {} and name {} is degenerate. Routing to degenerate relationship.", region.getRegionId(), region.getRegionName());
session.transfer(flowFile, REL_DEGENERATE);
} else {
session.transfer(flowFile, REL_SUCCESS);
}
}
} catch (final HBaseClientException e) {
getLogger().error("Failed to receive information on HBase regions for table {} due to {}", tableName, e);
context.yield();
throw new RuntimeException(e);
}
}
}

View File

@ -1,115 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.nifi.hbase;
import org.apache.nifi.annotation.behavior.InputRequirement;
import org.apache.nifi.annotation.behavior.SupportsBatching;
import org.apache.nifi.annotation.behavior.SystemResource;
import org.apache.nifi.annotation.behavior.SystemResourceConsideration;
import org.apache.nifi.annotation.documentation.CapabilityDescription;
import org.apache.nifi.annotation.documentation.Tags;
import org.apache.nifi.components.PropertyDescriptor;
import org.apache.nifi.flowfile.FlowFile;
import org.apache.nifi.hbase.put.PutColumn;
import org.apache.nifi.hbase.put.PutFlowFile;
import org.apache.nifi.processor.ProcessContext;
import org.apache.nifi.processor.ProcessSession;
import org.apache.nifi.processor.Relationship;
import org.apache.nifi.stream.io.StreamUtils;
import org.apache.nifi.util.StringUtils;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import static org.apache.nifi.hbase.util.VisibilityUtil.pickVisibilityString;
@SupportsBatching
@InputRequirement(InputRequirement.Requirement.INPUT_REQUIRED)
@Tags({"hadoop", "hbase"})
@CapabilityDescription("Adds the Contents of a FlowFile to HBase as the value of a single cell")
@SystemResourceConsideration(resource = SystemResource.MEMORY)
public class PutHBaseCell extends AbstractPutHBase {
@Override
protected List<PropertyDescriptor> getSupportedPropertyDescriptors() {
final List<PropertyDescriptor> properties = new ArrayList<>();
properties.add(HBASE_CLIENT_SERVICE);
properties.add(TABLE_NAME);
properties.add(ROW_ID);
properties.add(ROW_ID_ENCODING_STRATEGY);
properties.add(COLUMN_FAMILY);
properties.add(COLUMN_QUALIFIER);
properties.add(TIMESTAMP);
properties.add(BATCH_SIZE);
return properties;
}
@Override
public Set<Relationship> getRelationships() {
final Set<Relationship> rels = new HashSet<>();
rels.add(REL_SUCCESS);
rels.add(REL_FAILURE);
return rels;
}
@Override
protected PutFlowFile createPut(final ProcessSession session, final ProcessContext context, final FlowFile flowFile) {
final String tableName = context.getProperty(TABLE_NAME).evaluateAttributeExpressions(flowFile).getValue();
final String row = context.getProperty(ROW_ID).evaluateAttributeExpressions(flowFile).getValue();
final String columnFamily = context.getProperty(COLUMN_FAMILY).evaluateAttributeExpressions(flowFile).getValue();
final String columnQualifier = context.getProperty(COLUMN_QUALIFIER).evaluateAttributeExpressions(flowFile).getValue();
final String timestampValue = context.getProperty(TIMESTAMP).evaluateAttributeExpressions(flowFile).getValue();
final String visibilityStringToUse = pickVisibilityString(columnFamily, columnQualifier, flowFile, context);
final Long timestamp;
if (!StringUtils.isBlank(timestampValue)) {
try {
timestamp = Long.valueOf(timestampValue);
} catch (Exception e) {
getLogger().error("Invalid timestamp value: {}", timestampValue, e);
return null;
}
} else {
timestamp = null;
}
final byte[] buffer = new byte[(int) flowFile.getSize()];
session.read(flowFile, in -> StreamUtils.fillBuffer(in, buffer));
PutColumn column = StringUtils.isEmpty(visibilityStringToUse)
? new PutColumn(columnFamily.getBytes(StandardCharsets.UTF_8),
columnQualifier.getBytes(StandardCharsets.UTF_8), buffer, timestamp)
: new PutColumn(columnFamily.getBytes(StandardCharsets.UTF_8),
columnQualifier.getBytes(StandardCharsets.UTF_8), buffer, timestamp, visibilityStringToUse);
final Collection<PutColumn> columns = Collections.singletonList(column);
byte[] rowKeyBytes = getRow(row, context.getProperty(ROW_ID_ENCODING_STRATEGY).getValue());
return new PutFlowFile(tableName, rowKeyBytes, columns, flowFile);
}
}

View File

@ -1,300 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.nifi.hbase;
import com.fasterxml.jackson.databind.JsonNode;
import com.fasterxml.jackson.databind.ObjectMapper;
import org.apache.commons.lang3.StringUtils;
import org.apache.nifi.annotation.behavior.InputRequirement;
import org.apache.nifi.annotation.behavior.SupportsBatching;
import org.apache.nifi.annotation.documentation.CapabilityDescription;
import org.apache.nifi.annotation.documentation.Tags;
import org.apache.nifi.components.AllowableValue;
import org.apache.nifi.components.PropertyDescriptor;
import org.apache.nifi.components.ValidationContext;
import org.apache.nifi.components.ValidationResult;
import org.apache.nifi.expression.ExpressionLanguageScope;
import org.apache.nifi.flowfile.FlowFile;
import org.apache.nifi.hbase.put.PutColumn;
import org.apache.nifi.hbase.put.PutFlowFile;
import org.apache.nifi.processor.ProcessContext;
import org.apache.nifi.processor.ProcessSession;
import org.apache.nifi.processor.Relationship;
import org.apache.nifi.processor.exception.ProcessException;
import org.apache.nifi.processor.util.StandardValidators;
import java.io.BufferedInputStream;
import java.io.InputStream;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Set;
import java.util.concurrent.atomic.AtomicReference;
import static org.apache.nifi.hbase.util.VisibilityUtil.pickVisibilityString;
@SupportsBatching
@InputRequirement(InputRequirement.Requirement.INPUT_REQUIRED)
@Tags({"hadoop", "hbase", "put", "json"})
@CapabilityDescription("Adds rows to HBase based on the contents of incoming JSON documents. Each FlowFile must contain a single " +
"UTF-8 encoded JSON document, and any FlowFiles where the root element is not a single document will be routed to failure. " +
"Each JSON field name and value will become a column qualifier and value of the HBase row. Any fields with a null value " +
"will be skipped, and fields with a complex value will be handled according to the Complex Field Strategy. " +
"The row id can be specified either directly on the processor through the Row Identifier property, or can be extracted from the JSON " +
"document by specifying the Row Identifier Field Name property. This processor will hold the contents of all FlowFiles for the given batch " +
"in memory at one time.")
public class PutHBaseJSON extends AbstractPutHBase {
protected static final PropertyDescriptor ROW_FIELD_NAME = new PropertyDescriptor.Builder()
.name("Row Identifier Field Name")
.description("Specifies the name of a JSON element whose value should be used as the row id for the given JSON document.")
.expressionLanguageSupported(ExpressionLanguageScope.FLOWFILE_ATTRIBUTES)
.addValidator(StandardValidators.NON_EMPTY_VALIDATOR)
.build();
protected static final String FAIL_VALUE = "Fail";
protected static final String WARN_VALUE = "Warn";
protected static final String IGNORE_VALUE = "Ignore";
protected static final String TEXT_VALUE = "Text";
protected static final AllowableValue COMPLEX_FIELD_FAIL = new AllowableValue(FAIL_VALUE, FAIL_VALUE, "Route entire FlowFile to failure if any elements contain complex values.");
protected static final AllowableValue COMPLEX_FIELD_WARN = new AllowableValue(WARN_VALUE, WARN_VALUE, "Provide a warning and do not include field in row sent to HBase.");
protected static final AllowableValue COMPLEX_FIELD_IGNORE = new AllowableValue(IGNORE_VALUE, IGNORE_VALUE, "Silently ignore and do not include in row sent to HBase.");
protected static final AllowableValue COMPLEX_FIELD_TEXT = new AllowableValue(TEXT_VALUE, TEXT_VALUE, "Use the string representation of the complex field as the value of the given column.");
protected static final PropertyDescriptor COMPLEX_FIELD_STRATEGY = new PropertyDescriptor.Builder()
.name("Complex Field Strategy")
.description("Indicates how to handle complex fields, i.e. fields that do not have a single text value.")
.expressionLanguageSupported(ExpressionLanguageScope.NONE)
.required(true)
.allowableValues(COMPLEX_FIELD_FAIL, COMPLEX_FIELD_WARN, COMPLEX_FIELD_IGNORE, COMPLEX_FIELD_TEXT)
.defaultValue(COMPLEX_FIELD_TEXT.getValue())
.build();
protected static final AllowableValue FIELD_ENCODING_STRING = new AllowableValue(STRING_ENCODING_VALUE, STRING_ENCODING_VALUE,
"Stores the value of each field as a UTF-8 String.");
protected static final AllowableValue FIELD_ENCODING_BYTES = new AllowableValue(BYTES_ENCODING_VALUE, BYTES_ENCODING_VALUE,
"Stores the value of each field as the byte representation of the type derived from the JSON.");
protected static final PropertyDescriptor FIELD_ENCODING_STRATEGY = new PropertyDescriptor.Builder()
.name("Field Encoding Strategy")
.description(("Indicates how to store the value of each field in HBase. The default behavior is to convert each value from the " +
"JSON to a String, and store the UTF-8 bytes. Choosing Bytes will interpret the type of each field from " +
"the JSON, and convert the value to the byte representation of that type, meaning an integer will be stored as the " +
"byte representation of that integer."))
.required(true)
.allowableValues(FIELD_ENCODING_STRING, FIELD_ENCODING_BYTES)
.defaultValue(FIELD_ENCODING_STRING.getValue())
.build();
@Override
public final List<PropertyDescriptor> getSupportedPropertyDescriptors() {
final List<PropertyDescriptor> properties = new ArrayList<>();
properties.add(HBASE_CLIENT_SERVICE);
properties.add(TABLE_NAME);
properties.add(ROW_ID);
properties.add(ROW_FIELD_NAME);
properties.add(ROW_ID_ENCODING_STRATEGY);
properties.add(COLUMN_FAMILY);
properties.add(TIMESTAMP);
properties.add(BATCH_SIZE);
properties.add(COMPLEX_FIELD_STRATEGY);
properties.add(FIELD_ENCODING_STRATEGY);
return properties;
}
@Override
public Set<Relationship> getRelationships() {
final Set<Relationship> rels = new HashSet<>();
rels.add(REL_SUCCESS);
rels.add(REL_FAILURE);
return rels;
}
@Override
protected Collection<ValidationResult> customValidate(ValidationContext validationContext) {
final Collection<ValidationResult> results = new ArrayList<>();
final String rowId = validationContext.getProperty(ROW_ID).getValue();
final String rowFieldName = validationContext.getProperty(ROW_FIELD_NAME).getValue();
if (StringUtils.isBlank(rowId) && StringUtils.isBlank(rowFieldName)) {
results.add(new ValidationResult.Builder()
.subject(this.getClass().getSimpleName())
.explanation("Row Identifier or Row Identifier Field Name is required")
.valid(false)
.build());
} else if (!StringUtils.isBlank(rowId) && !StringUtils.isBlank(rowFieldName)) {
results.add(new ValidationResult.Builder()
.subject(this.getClass().getSimpleName())
.explanation("Row Identifier and Row Identifier Field Name can not be used together")
.valid(false)
.build());
}
return results;
}
@Override
protected PutFlowFile createPut(final ProcessSession session, final ProcessContext context, final FlowFile flowFile) {
final String tableName = context.getProperty(TABLE_NAME).evaluateAttributeExpressions(flowFile).getValue();
final String rowId = context.getProperty(ROW_ID).evaluateAttributeExpressions(flowFile).getValue();
final String rowFieldName = context.getProperty(ROW_FIELD_NAME).evaluateAttributeExpressions(flowFile).getValue();
final String columnFamily = context.getProperty(COLUMN_FAMILY).evaluateAttributeExpressions(flowFile).getValue();
final String timestampValue = context.getProperty(TIMESTAMP).evaluateAttributeExpressions(flowFile).getValue();
final boolean extractRowId = !StringUtils.isBlank(rowFieldName);
final String complexFieldStrategy = context.getProperty(COMPLEX_FIELD_STRATEGY).getValue();
final String fieldEncodingStrategy = context.getProperty(FIELD_ENCODING_STRATEGY).getValue();
final String rowIdEncodingStrategy = context.getProperty(ROW_ID_ENCODING_STRATEGY).getValue();
final Long timestamp;
if (!StringUtils.isBlank(timestampValue)) {
try {
timestamp = Long.valueOf(timestampValue);
} catch (Exception e) {
getLogger().error("Invalid timestamp value: {}", timestampValue, e);
return null;
}
} else {
timestamp = null;
}
// Parse the JSON document
final ObjectMapper mapper = new ObjectMapper();
final AtomicReference<JsonNode> rootNodeRef = new AtomicReference<>(null);
try {
session.read(flowFile, in -> {
try (final InputStream bufferedIn = new BufferedInputStream(in)) {
rootNodeRef.set(mapper.readTree(bufferedIn));
}
});
} catch (final ProcessException pe) {
getLogger().error("Failed to parse {} as JSON; routing to failure", flowFile, pe);
return null;
}
final JsonNode rootNode = rootNodeRef.get();
if (rootNode.isArray()) {
getLogger().error("Root node of JSON must be a single document, found array for {}; routing to failure", flowFile);
return null;
}
final Collection<PutColumn> columns = new ArrayList<>();
final AtomicReference<String> rowIdHolder = new AtomicReference<>(null);
// convert each field/value to a column for the put, skip over nulls and arrays
final Iterator<String> fieldNames = rootNode.fieldNames();
while (fieldNames.hasNext()) {
final String fieldName = fieldNames.next();
final AtomicReference<byte[]> fieldValueHolder = new AtomicReference<>(null);
final JsonNode fieldNode = rootNode.get(fieldName);
if (fieldNode.isNull()) {
getLogger().debug("Skipping {} because value was null", fieldName);
} else if (fieldNode.isValueNode()) {
// for a value node we need to determine if we are storing the bytes of a string, or the bytes of actual types
if (STRING_ENCODING_VALUE.equals(fieldEncodingStrategy)) {
final byte[] valueBytes = clientService.toBytes(fieldNode.asText());
fieldValueHolder.set(valueBytes);
} else {
fieldValueHolder.set(extractJNodeValue(fieldNode));
}
} else {
// for non-null, non-value nodes, determine what to do based on the handling strategy
switch (complexFieldStrategy) {
case FAIL_VALUE:
getLogger().error("Complex value found for {}; routing to failure", fieldName);
return null;
case WARN_VALUE:
getLogger().warn("Complex value found for {}; skipping", fieldName);
break;
case TEXT_VALUE:
// use toString() here because asText() is only guaranteed to be supported on value nodes
// some other types of nodes, like ArrayNode, provide toString implementations
fieldValueHolder.set(clientService.toBytes(fieldNode.toString()));
break;
case IGNORE_VALUE:
// silently skip
break;
default:
break;
}
}
// if we have a field value, then see if this is the row id field, if so store the value for later
// otherwise add a new column where the fieldName and fieldValue are the column qualifier and value
if (fieldValueHolder.get() != null) {
if (extractRowId && fieldName.equals(rowFieldName)) {
rowIdHolder.set(fieldNode.asText());
} else {
final byte[] colFamBytes = columnFamily.getBytes(StandardCharsets.UTF_8);
final byte[] colQualBytes = fieldName.getBytes(StandardCharsets.UTF_8);
final byte[] colValBytes = fieldValueHolder.get();
final String visibilityStringToUse = pickVisibilityString(columnFamily, fieldName, flowFile, context);
PutColumn column = StringUtils.isEmpty(visibilityStringToUse)
? new PutColumn(colFamBytes, colQualBytes, colValBytes, timestamp)
: new PutColumn(colFamBytes, colQualBytes, colValBytes, timestamp, visibilityStringToUse);
columns.add(column);
}
}
}
// if we are expecting a field name to use for the row id and the incoming document doesn't have it
// log an error message so the user can see what the field names were and return null so it gets routed to failure
if (extractRowId && rowIdHolder.get() == null) {
final String fieldNameStr = StringUtils.join(rootNode.fieldNames(), ",");
getLogger().error("Row ID field named '{}' not found in field names '{}'; routing to failure", rowFieldName, fieldNameStr);
return null;
}
final String putRowId = (extractRowId ? rowIdHolder.get() : rowId);
byte[] rowKeyBytes = getRow(putRowId, rowIdEncodingStrategy);
return new PutFlowFile(tableName, rowKeyBytes, columns, flowFile);
}
/*
*Handles the conversion of the JsonNode value into it correct underlying data type in the form of a byte array as expected by the columns.add function
*/
private byte[] extractJNodeValue(final JsonNode n) {
if (n.isBoolean()) {
//boolean
return clientService.toBytes(n.asBoolean());
} else if (n.isNumber()) {
if (n.isIntegralNumber()) {
//interpret as Long
return clientService.toBytes(n.asLong());
} else {
//interpret as Double
return clientService.toBytes(n.asDouble());
}
} else {
//if all else fails, interpret as String
return clientService.toBytes(n.asText());
}
}
}

View File

@ -1,475 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.nifi.hbase;
import org.apache.nifi.annotation.behavior.InputRequirement;
import org.apache.nifi.annotation.behavior.ReadsAttribute;
import org.apache.nifi.annotation.behavior.SupportsBatching;
import org.apache.nifi.annotation.behavior.WritesAttribute;
import org.apache.nifi.annotation.documentation.CapabilityDescription;
import org.apache.nifi.annotation.documentation.Tags;
import org.apache.nifi.annotation.lifecycle.OnScheduled;
import org.apache.nifi.components.AllowableValue;
import org.apache.nifi.components.PropertyDescriptor;
import org.apache.nifi.expression.ExpressionLanguageScope;
import org.apache.nifi.components.Validator;
import org.apache.nifi.flowfile.FlowFile;
import org.apache.nifi.hbase.put.PutColumn;
import org.apache.nifi.hbase.put.PutFlowFile;
import org.apache.nifi.hbase.util.VisibilityUtil;
import org.apache.nifi.processor.ProcessContext;
import org.apache.nifi.processor.ProcessSession;
import org.apache.nifi.processor.Relationship;
import org.apache.nifi.processor.exception.ProcessException;
import org.apache.nifi.processor.util.StandardValidators;
import org.apache.nifi.record.path.FieldValue;
import org.apache.nifi.record.path.RecordPath;
import org.apache.nifi.record.path.RecordPathResult;
import org.apache.nifi.record.path.util.RecordPathCache;
import org.apache.nifi.serialization.RecordReader;
import org.apache.nifi.serialization.RecordReaderFactory;
import org.apache.nifi.serialization.record.Record;
import org.apache.nifi.serialization.record.RecordField;
import org.apache.nifi.serialization.record.RecordFieldType;
import org.apache.nifi.serialization.record.RecordSchema;
import org.apache.nifi.serialization.record.util.IllegalTypeConversionException;
import org.apache.nifi.util.StringUtils;
import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
@SupportsBatching
@InputRequirement(InputRequirement.Requirement.INPUT_REQUIRED)
@Tags({"hadoop", "hbase", "put", "record"})
@CapabilityDescription("Adds rows to HBase based on the contents of a flowfile using a configured record reader.")
@ReadsAttribute(attribute = "restart.index", description = "Reads restart.index when it needs to replay part of a record set that did not get into HBase.")
@WritesAttribute(attribute = "restart.index", description = "Writes restart.index when a batch fails to be insert into HBase")
public class PutHBaseRecord extends AbstractPutHBase {
protected static final PropertyDescriptor ROW_FIELD_NAME = new PropertyDescriptor.Builder()
.name("Row Identifier Field Name")
.description("Specifies the name of a record field whose value should be used as the row id for the given record.")
.required(true)
.expressionLanguageSupported(ExpressionLanguageScope.FLOWFILE_ATTRIBUTES)
.addValidator(StandardValidators.NON_EMPTY_VALIDATOR)
.build();
protected static final PropertyDescriptor TIMESTAMP_FIELD_NAME = new PropertyDescriptor.Builder()
.name("timestamp-field-name")
.displayName("Timestamp Field Name")
.description("Specifies the name of a record field whose value should be used as the timestamp for the cells in HBase. " +
"The value of this field must be a number, string, or date that can be converted to a long. " +
"If this field is left blank, HBase will use the current time.")
.expressionLanguageSupported(ExpressionLanguageScope.FLOWFILE_ATTRIBUTES)
.addValidator(StandardValidators.NON_EMPTY_VALIDATOR)
.build();
protected static final PropertyDescriptor DEFAULT_VISIBILITY_STRING = new PropertyDescriptor.Builder()
.name("hbase-default-vis-string")
.displayName("Default Visibility String")
.description("When using visibility labels, any value set in this field will be applied to all cells that are written unless " +
"an attribute with the convention \"visibility.COLUMN_FAMILY.COLUMN_QUALIFIER\" is present on the flowfile. If this field " +
"is left blank, it will be assumed that no visibility is to be set unless visibility-related attributes are set. NOTE: " +
"this configuration will have no effect on your data if you have not enabled visibility labels in the HBase cluster.")
.required(false)
.expressionLanguageSupported(ExpressionLanguageScope.FLOWFILE_ATTRIBUTES)
.addValidator(Validator.VALID)
.build();
protected static final String FAIL_VALUE = "Fail";
protected static final String WARN_VALUE = "Warn";
protected static final String IGNORE_VALUE = "Ignore";
protected static final String TEXT_VALUE = "Text";
protected static final AllowableValue COMPLEX_FIELD_FAIL = new AllowableValue(FAIL_VALUE, FAIL_VALUE, "Route entire FlowFile to failure if any elements contain complex values.");
protected static final AllowableValue COMPLEX_FIELD_WARN = new AllowableValue(WARN_VALUE, WARN_VALUE, "Provide a warning and do not include field in row sent to HBase.");
protected static final AllowableValue COMPLEX_FIELD_IGNORE = new AllowableValue(IGNORE_VALUE, IGNORE_VALUE, "Silently ignore and do not include in row sent to HBase.");
protected static final AllowableValue COMPLEX_FIELD_TEXT = new AllowableValue(TEXT_VALUE, TEXT_VALUE, "Use the string representation of the complex field as the value of the given column.");
static final PropertyDescriptor RECORD_READER_FACTORY = new PropertyDescriptor.Builder()
.name("record-reader")
.displayName("Record Reader")
.description("Specifies the Controller Service to use for parsing incoming data and determining the data's schema")
.identifiesControllerService(RecordReaderFactory.class)
.required(true)
.build();
protected static final PropertyDescriptor COMPLEX_FIELD_STRATEGY = new PropertyDescriptor.Builder()
.name("Complex Field Strategy")
.description("Indicates how to handle complex fields, i.e. fields that do not have a single text value.")
.expressionLanguageSupported(ExpressionLanguageScope.NONE)
.required(true)
.allowableValues(COMPLEX_FIELD_FAIL, COMPLEX_FIELD_WARN, COMPLEX_FIELD_IGNORE, COMPLEX_FIELD_TEXT)
.defaultValue(COMPLEX_FIELD_TEXT.getValue())
.build();
protected static final AllowableValue FIELD_ENCODING_STRING = new AllowableValue(STRING_ENCODING_VALUE, STRING_ENCODING_VALUE,
"Stores the value of each field as a UTF-8 String.");
protected static final AllowableValue FIELD_ENCODING_BYTES = new AllowableValue(BYTES_ENCODING_VALUE, BYTES_ENCODING_VALUE,
"Stores the value of each field as the byte representation of the type derived from the record.");
protected static final PropertyDescriptor FIELD_ENCODING_STRATEGY = new PropertyDescriptor.Builder()
.name("Field Encoding Strategy")
.description(("Indicates how to store the value of each field in HBase. The default behavior is to convert each value from the " +
"record to a String, and store the UTF-8 bytes. Choosing Bytes will interpret the type of each field from " +
"the record, and convert the value to the byte representation of that type, meaning an integer will be stored as the " +
"byte representation of that integer."))
.required(true)
.allowableValues(FIELD_ENCODING_STRING, FIELD_ENCODING_BYTES)
.defaultValue(FIELD_ENCODING_STRING.getValue())
.build();
protected static final PropertyDescriptor BATCH_SIZE = new PropertyDescriptor.Builder()
.name("Batch Size")
.description("The maximum number of records to be sent to HBase at any one time from the record set.")
.required(true)
.addValidator(StandardValidators.POSITIVE_INTEGER_VALIDATOR)
.defaultValue("1000")
.build();
protected static final AllowableValue NULL_FIELD_EMPTY = new AllowableValue("empty-bytes", "Empty Bytes",
"Use empty bytes. This can be used to overwrite existing fields or to put an empty placeholder value if you want" +
" every field to be present even if it has a null value.");
protected static final AllowableValue NULL_FIELD_SKIP = new AllowableValue("skip-field", "Skip Field", "Skip the field (don't process it at all).");
protected static final PropertyDescriptor NULL_FIELD_STRATEGY = new PropertyDescriptor.Builder()
.name("hbase-record-null-field-strategy")
.displayName("Null Field Strategy")
.required(true)
.defaultValue("skip-field")
.description("Handle null field values as either an empty string or skip them altogether.")
.allowableValues(NULL_FIELD_EMPTY, NULL_FIELD_SKIP)
.build();
protected static final PropertyDescriptor VISIBILITY_RECORD_PATH = new PropertyDescriptor.Builder()
.name("put-hb-rec-visibility-record-path")
.displayName("Visibility String Record Path Root")
.description("A record path that points to part of the record which contains a path to a mapping of visibility strings to record paths")
.required(false)
.addValidator(Validator.VALID)
.build();
protected RecordPathCache recordPathCache;
@Override
public final List<PropertyDescriptor> getSupportedPropertyDescriptors() {
final List<PropertyDescriptor> properties = new ArrayList<>();
properties.add(RECORD_READER_FACTORY);
properties.add(HBASE_CLIENT_SERVICE);
properties.add(TABLE_NAME);
properties.add(ROW_FIELD_NAME);
properties.add(ROW_ID_ENCODING_STRATEGY);
properties.add(NULL_FIELD_STRATEGY);
properties.add(COLUMN_FAMILY);
properties.add(DEFAULT_VISIBILITY_STRING);
properties.add(VISIBILITY_RECORD_PATH);
properties.add(TIMESTAMP_FIELD_NAME);
properties.add(BATCH_SIZE);
properties.add(COMPLEX_FIELD_STRATEGY);
properties.add(FIELD_ENCODING_STRATEGY);
return properties;
}
@Override
public Set<Relationship> getRelationships() {
final Set<Relationship> rels = new HashSet<>();
rels.add(REL_SUCCESS);
rels.add(REL_FAILURE);
return rels;
}
private int addBatch(String tableName, List<PutFlowFile> flowFiles) throws IOException {
int columns = 0;
clientService.put(tableName, flowFiles);
for (PutFlowFile put : flowFiles) {
columns += put.getColumns().size();
}
return columns;
}
@Override
@OnScheduled
public void onScheduled(final ProcessContext context) {
recordPathCache = new RecordPathCache(4);
super.onScheduled(context);
}
@Override
public void onTrigger(final ProcessContext context, final ProcessSession session) throws ProcessException {
final int batchSize = context.getProperty(BATCH_SIZE).asInteger();
FlowFile flowFile = session.get();
if (flowFile == null) {
return;
}
final RecordReaderFactory recordParserFactory = context.getProperty(RECORD_READER_FACTORY)
.asControllerService(RecordReaderFactory.class);
List<PutFlowFile> flowFiles = new ArrayList<>();
final String tableName = context.getProperty(TABLE_NAME).evaluateAttributeExpressions(flowFile).getValue();
final String rowFieldName = context.getProperty(ROW_FIELD_NAME).evaluateAttributeExpressions(flowFile).getValue();
final String columnFamily = context.getProperty(COLUMN_FAMILY).evaluateAttributeExpressions(flowFile).getValue();
final String timestampFieldName = context.getProperty(TIMESTAMP_FIELD_NAME).evaluateAttributeExpressions(flowFile).getValue();
final String fieldEncodingStrategy = context.getProperty(FIELD_ENCODING_STRATEGY).getValue();
final String complexFieldStrategy = context.getProperty(COMPLEX_FIELD_STRATEGY).getValue();
final String rowEncodingStrategy = context.getProperty(ROW_ID_ENCODING_STRATEGY).getValue();
final String recordPathText = context.getProperty(VISIBILITY_RECORD_PATH).getValue();
RecordPath recordPath = null;
if (recordPathCache != null && !StringUtils.isEmpty(recordPathText)) {
recordPath = recordPathCache.getCompiled(recordPathText);
}
final long start = System.nanoTime();
int index = 0;
int columns = 0;
boolean failed = false;
String startIndexStr = flowFile.getAttribute("restart.index");
int startIndex = -1;
if (startIndexStr != null) {
startIndex = Integer.parseInt(startIndexStr);
}
PutFlowFile last = null;
try (final InputStream in = session.read(flowFile);
final RecordReader reader = recordParserFactory.createRecordReader(flowFile, in, getLogger())) {
Record record;
if (startIndex >= 0) {
while (index++ < startIndex && (reader.nextRecord()) != null) { }
}
while ((record = reader.nextRecord()) != null) {
PutFlowFile putFlowFile = createPut(context, record, reader.getSchema(), recordPath, flowFile, rowFieldName, columnFamily,
timestampFieldName, fieldEncodingStrategy, rowEncodingStrategy, complexFieldStrategy);
if (putFlowFile.getColumns().size() == 0) {
continue;
}
flowFiles.add(putFlowFile);
index++;
if (flowFiles.size() == batchSize) {
columns += addBatch(tableName, flowFiles);
last = flowFiles.get(flowFiles.size() - 1);
flowFiles = new ArrayList<>();
}
}
if (flowFiles.size() > 0) {
columns += addBatch(tableName, flowFiles);
last = flowFiles.get(flowFiles.size() - 1);
}
} catch (Exception ex) {
getLogger().error("Failed to put records to HBase.", ex);
failed = true;
}
if (!failed) {
if (columns > 0) {
sendProvenance(session, flowFile, columns, System.nanoTime() - start, last);
}
flowFile = session.removeAttribute(flowFile, "restart.index");
session.transfer(flowFile, REL_SUCCESS);
} else {
String restartIndex = Integer.toString(index - flowFiles.size());
flowFile = session.putAttribute(flowFile, "restart.index", restartIndex);
if (columns > 0) {
sendProvenance(session, flowFile, columns, System.nanoTime() - start, last);
}
flowFile = session.penalize(flowFile);
session.transfer(flowFile, REL_FAILURE);
}
}
private void sendProvenance(ProcessSession session, FlowFile flowFile, int columns, long time, PutFlowFile pff) {
final String details = String.format("Put %d cells to HBase.", columns);
session.getProvenanceReporter().send(flowFile, getTransitUri(pff), details, time);
}
@Override
protected PutFlowFile createPut(ProcessSession session, ProcessContext context, FlowFile flowFile) {
return null;
}
protected byte[] asBytes(String field, RecordFieldType fieldType, Record record, boolean asString, String complexFieldStrategy) throws PutCreationFailedInvokedException {
byte[] retVal;
if (asString) {
switch (fieldType) {
case RECORD:
case CHOICE:
case ARRAY:
case MAP:
retVal = handleComplexField(record, field, complexFieldStrategy);
break;
default:
final String value = record.getAsString(field);
retVal = clientService.toBytes(value);
break;
}
} else {
switch (fieldType) {
case RECORD:
case CHOICE:
case ARRAY:
case MAP:
retVal = handleComplexField(record, field, complexFieldStrategy);
break;
case BOOLEAN:
retVal = clientService.toBytes(record.getAsBoolean(field));
break;
case DOUBLE:
retVal = clientService.toBytes(record.getAsDouble(field));
break;
case FLOAT:
retVal = clientService.toBytes(record.getAsFloat(field));
break;
case INT:
retVal = clientService.toBytes(record.getAsInt(field));
break;
case LONG:
retVal = clientService.toBytes(record.getAsLong(field));
break;
default:
final String value = record.getAsString(field);
retVal = clientService.toBytes(value);
break;
}
}
return retVal;
}
private byte[] handleComplexField(Record record, String field, String complexFieldStrategy) throws PutCreationFailedInvokedException {
switch (complexFieldStrategy) {
case FAIL_VALUE:
getLogger().error("Complex value found for {}; routing to failure", field);
throw new PutCreationFailedInvokedException(String.format("Complex value found for %s; routing to failure", field));
case WARN_VALUE:
getLogger().warn("Complex value found for {}; skipping", field);
return null;
case TEXT_VALUE:
final String value = record.getAsString(field);
return clientService.toBytes(value);
case IGNORE_VALUE:
// silently skip
return null;
default:
return null;
}
}
static final byte[] EMPTY = "".getBytes();
protected PutFlowFile createPut(ProcessContext context, Record record, RecordSchema schema, RecordPath recordPath, FlowFile flowFile, String rowFieldName,
String columnFamily, String timestampFieldName, String fieldEncodingStrategy, String rowEncodingStrategy,
String complexFieldStrategy)
throws PutCreationFailedInvokedException {
PutFlowFile retVal = null;
final String tableName = context.getProperty(TABLE_NAME).evaluateAttributeExpressions(flowFile).getValue();
final String nullStrategy = context.getProperty(NULL_FIELD_STRATEGY).getValue();
final String defaultVisibility = context.getProperty(DEFAULT_VISIBILITY_STRING).evaluateAttributeExpressions(flowFile).getValue();
boolean asString = STRING_ENCODING_VALUE.equals(fieldEncodingStrategy);
final byte[] fam = clientService.toBytes(columnFamily);
if (record != null) {
final Long timestamp;
if (!StringUtils.isBlank(timestampFieldName)) {
try {
timestamp = record.getAsLong(timestampFieldName);
} catch (IllegalTypeConversionException e) {
throw new PutCreationFailedInvokedException("Could not convert " + timestampFieldName + " to a long", e);
}
if (timestamp == null) {
getLogger().warn("The value of timestamp field {} was null, record will be inserted with latest timestamp", timestampFieldName);
}
} else {
timestamp = null;
}
RecordField visField = null;
Map visSettings = null;
if (recordPath != null) {
final RecordPathResult result = recordPath.evaluate(record);
FieldValue fv = result.getSelectedFields().findFirst().get();
visField = fv.getField();
visSettings = (Map) fv.getValue();
}
List<PutColumn> columns = new ArrayList<>();
for (String name : schema.getFieldNames()) {
if (name.equals(rowFieldName) || name.equals(timestampFieldName) || (visField != null && name.equals(visField.getFieldName()))) {
continue;
}
Object val = record.getValue(name);
final byte[] fieldValueBytes;
if (val == null && nullStrategy.equals(NULL_FIELD_SKIP.getValue())) {
continue;
} else if (val == null && nullStrategy.equals(NULL_FIELD_EMPTY.getValue())) {
fieldValueBytes = EMPTY;
} else {
fieldValueBytes = asBytes(name, schema.getField(name).get().getDataType().getFieldType(), record, asString, complexFieldStrategy);
}
if (fieldValueBytes != null) {
String visString = (visField != null && visSettings != null && visSettings.containsKey(name))
? (String) visSettings.get(name) : defaultVisibility;
//TODO: factor this into future enhancements to how complex records are handled.
if (StringUtils.isBlank(visString)) {
visString = VisibilityUtil.pickVisibilityString(columnFamily, name, flowFile, context);
}
PutColumn column = !StringUtils.isEmpty(visString)
? new PutColumn(fam, clientService.toBytes(name), fieldValueBytes, timestamp, visString)
: new PutColumn(fam, clientService.toBytes(name), fieldValueBytes, timestamp);
columns.add(column);
}
}
String rowIdValue = record.getAsString(rowFieldName);
if (rowIdValue == null) {
throw new PutCreationFailedInvokedException(String.format("Row ID was null for flowfile with ID %s", flowFile.getAttribute("uuid")));
}
byte[] rowId = getRow(rowIdValue, rowEncodingStrategy);
retVal = new PutFlowFile(tableName, rowId, columns, flowFile);
}
return retVal;
}
static class PutCreationFailedInvokedException extends Exception {
PutCreationFailedInvokedException(String msg) {
super(msg);
}
PutCreationFailedInvokedException(String msg, Exception e) {
super(msg, e);
}
}
}

View File

@ -1,601 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.nifi.hbase;
import org.apache.commons.lang3.StringUtils;
import org.apache.nifi.annotation.behavior.InputRequirement;
import org.apache.nifi.annotation.behavior.WritesAttribute;
import org.apache.nifi.annotation.behavior.WritesAttributes;
import org.apache.nifi.annotation.documentation.CapabilityDescription;
import org.apache.nifi.annotation.documentation.Tags;
import org.apache.nifi.annotation.lifecycle.OnScheduled;
import org.apache.nifi.components.AllowableValue;
import org.apache.nifi.components.PropertyDescriptor;
import org.apache.nifi.components.ValidationContext;
import org.apache.nifi.components.ValidationResult;
import org.apache.nifi.expression.ExpressionLanguageScope;
import org.apache.nifi.flowfile.FlowFile;
import org.apache.nifi.flowfile.attributes.CoreAttributes;
import org.apache.nifi.hbase.io.JsonFullRowSerializer;
import org.apache.nifi.hbase.io.JsonQualifierAndValueRowSerializer;
import org.apache.nifi.hbase.io.RowSerializer;
import org.apache.nifi.hbase.scan.Column;
import org.apache.nifi.hbase.scan.ResultCell;
import org.apache.nifi.hbase.scan.ResultHandler;
import org.apache.nifi.processor.AbstractProcessor;
import org.apache.nifi.processor.ProcessContext;
import org.apache.nifi.processor.ProcessSession;
import org.apache.nifi.processor.Relationship;
import org.apache.nifi.processor.exception.ProcessException;
import org.apache.nifi.processor.util.StandardValidators;
import java.io.IOException;
import java.nio.charset.Charset;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import java.util.concurrent.atomic.AtomicReference;
import java.util.regex.Pattern;
@InputRequirement(InputRequirement.Requirement.INPUT_REQUIRED)
@Tags({"hbase", "scan", "fetch", "get"})
@CapabilityDescription("Scans and fetches rows from an HBase table. This processor may be used to fetch rows from hbase table by specifying a range of rowkey values (start and/or end ),"
+ "by time range, by filter expression, or any combination of them. "
+ "Order of records can be controlled by a property Reversed"
+ "Number of rows retrieved by the processor can be limited.")
@WritesAttributes({
@WritesAttribute(attribute = "hbase.table", description = "The name of the HBase table that the row was fetched from"),
@WritesAttribute(attribute = "mime.type", description = "Set to application/json when using a Destination of flowfile-content, not set or modified otherwise"),
@WritesAttribute(attribute = "hbase.rows.count", description = "Number of rows in the content of given flow file"),
@WritesAttribute(attribute = "scanhbase.results.found", description = "Indicates whether at least one row has been found in given hbase table with provided conditions. "
+ "Could be null (not present) if transfered to FAILURE")
})
public class ScanHBase extends AbstractProcessor implements VisibilityFetchSupport {
//enhanced regex for columns to allow "-" in column qualifier names
static final Pattern COLUMNS_PATTERN = Pattern.compile("\\w+(:(\\w|-)+)?(?:,\\w+(:(\\w|-)+)?)*");
static final String nl = System.lineSeparator();
static final PropertyDescriptor HBASE_CLIENT_SERVICE = new PropertyDescriptor.Builder()
.displayName("HBase Client Service")
.name("scanhbase-client-service")
.description("Specifies the Controller Service to use for accessing HBase.")
.required(true)
.identifiesControllerService(HBaseClientService.class)
.build();
static final PropertyDescriptor TABLE_NAME = new PropertyDescriptor.Builder()
.displayName("Table Name")
.name("scanhbase-table-name")
.description("The name of the HBase Table to fetch from.")
.required(true)
.expressionLanguageSupported(ExpressionLanguageScope.FLOWFILE_ATTRIBUTES)
.addValidator(StandardValidators.NON_EMPTY_VALIDATOR)
.build();
static final PropertyDescriptor START_ROW = new PropertyDescriptor.Builder()
.displayName("Start rowkey")
.name("scanhbase-start-rowkey")
.description("The rowkey to start scan from.")
.required(false)
.expressionLanguageSupported(ExpressionLanguageScope.FLOWFILE_ATTRIBUTES)
.addValidator(StandardValidators.NON_EMPTY_VALIDATOR)
.build();
static final PropertyDescriptor END_ROW = new PropertyDescriptor.Builder()
.displayName("End rowkey")
.name("scanhbase-end-rowkey")
.description("The row key to end scan by.")
.required(false)
.expressionLanguageSupported(ExpressionLanguageScope.FLOWFILE_ATTRIBUTES)
.addValidator(StandardValidators.NON_EMPTY_VALIDATOR)
.build();
static final PropertyDescriptor TIME_RANGE_MIN = new PropertyDescriptor.Builder()
.displayName("Time range min")
.name("scanhbase-time-range-min")
.description("Time range min value. Both min and max values for time range should be either blank or provided.")
.required(false)
.expressionLanguageSupported(ExpressionLanguageScope.FLOWFILE_ATTRIBUTES)
.addValidator(StandardValidators.LONG_VALIDATOR)
.build();
static final PropertyDescriptor TIME_RANGE_MAX = new PropertyDescriptor.Builder()
.displayName("Time range max")
.name("scanhbase-time-range-max")
.description("Time range max value. Both min and max values for time range should be either blank or provided.")
.required(false)
.expressionLanguageSupported(ExpressionLanguageScope.FLOWFILE_ATTRIBUTES)
.addValidator(StandardValidators.LONG_VALIDATOR)
.build();
static final PropertyDescriptor LIMIT_ROWS = new PropertyDescriptor.Builder()
.displayName("Limit rows")
.name("scanhbase-limit")
.description("Limit number of rows retrieved by scan.")
.required(false)
.expressionLanguageSupported(ExpressionLanguageScope.FLOWFILE_ATTRIBUTES)
.addValidator(StandardValidators.INTEGER_VALIDATOR)
.build();
static final PropertyDescriptor BULK_SIZE = new PropertyDescriptor.Builder()
.displayName("Max rows per flow file")
.name("scanhbase-bulk-size")
.description("Limits number of rows in single flow file content. Set to 0 to avoid multiple flow files.")
.required(false)
.expressionLanguageSupported(ExpressionLanguageScope.FLOWFILE_ATTRIBUTES)
.defaultValue("0")
.addValidator(StandardValidators.INTEGER_VALIDATOR)
.build();
static final PropertyDescriptor REVERSED_SCAN = new PropertyDescriptor.Builder()
.displayName("Reversed order")
.name("scanhbase-reversed-order")
.description("Set whether this scan is a reversed one. This is false by default which means forward(normal) scan.")
.expressionLanguageSupported(ExpressionLanguageScope.NONE)
.allowableValues("true", "false")
.required(false)
.defaultValue("false")
.addValidator(StandardValidators.BOOLEAN_VALIDATOR)
.build();
static final PropertyDescriptor FILTER_EXPRESSION = new PropertyDescriptor.Builder()
.displayName("Filter expression")
.name("scanhbase-filter-expression")
.description("An HBase filter expression that will be applied to the scan. This property can not be used when also using the Columns property. "
+ "Example: \"ValueFilter( =, 'binaryprefix:commit' )\"")
.required(false)
.expressionLanguageSupported(ExpressionLanguageScope.FLOWFILE_ATTRIBUTES)
.addValidator(StandardValidators.NON_EMPTY_VALIDATOR)
.build();
static final PropertyDescriptor COLUMNS = new PropertyDescriptor.Builder()
.displayName("Columns")
.name("scanhbase-columns")
.description("An optional comma-separated list of \"<colFamily>:<colQualifier>\" pairs to fetch. To return all columns " +
"for a given family, leave off the qualifier such as \"<colFamily1>,<colFamily2>\".")
.required(false)
.expressionLanguageSupported(ExpressionLanguageScope.FLOWFILE_ATTRIBUTES)
.addValidator(StandardValidators.createRegexMatchingValidator(COLUMNS_PATTERN))
.build();
static final AllowableValue JSON_FORMAT_FULL_ROW = new AllowableValue("full-row", "full-row",
"Creates a JSON document with the format: {\"row\":<row-id>, \"cells\":[{\"fam\":<col-fam>, \"qual\":<col-val>, \"val\":<value>, \"ts\":<timestamp>}]}.");
static final AllowableValue JSON_FORMAT_QUALIFIER_AND_VALUE = new AllowableValue("col-qual-and-val", "col-qual-and-val",
"Creates a JSON document with the format: {\"<col-qual>\":\"<value>\", \"<col-qual>\":\"<value>\".");
static final PropertyDescriptor JSON_FORMAT = new PropertyDescriptor.Builder()
.displayName("JSON Format")
.name("scanhbase-json-format")
.description("Specifies how to represent the HBase row as a JSON document.")
.required(true)
.allowableValues(JSON_FORMAT_FULL_ROW, JSON_FORMAT_QUALIFIER_AND_VALUE)
.defaultValue(JSON_FORMAT_FULL_ROW.getValue())
.build();
static final PropertyDescriptor DECODE_CHARSET = new PropertyDescriptor.Builder()
.displayName("Decode Character Set")
.name("scanhbase-decode-charset")
.description("The character set used to decode data from HBase.")
.required(true)
.defaultValue("UTF-8")
.addValidator(StandardValidators.CHARACTER_SET_VALIDATOR)
.build();
static final PropertyDescriptor ENCODE_CHARSET = new PropertyDescriptor.Builder()
.displayName("Encode Character Set")
.name("scanhbase-encode-charset")
.description("The character set used to encode the JSON representation of the row.")
.required(true)
.defaultValue("UTF-8")
.addValidator(StandardValidators.CHARACTER_SET_VALIDATOR)
.build();
static final PropertyDescriptor BLOCK_CACHE = new PropertyDescriptor.Builder()
.displayName("Block Cache")
.name("block-cache")
.description("The Block Cache to enable/disable block cache on HBase scan.")
.expressionLanguageSupported(ExpressionLanguageScope.NONE)
.allowableValues("true", "false")
.required(true)
.defaultValue("true")
.addValidator(StandardValidators.BOOLEAN_VALIDATOR)
.build();
public static final Relationship REL_ORIGINAL = new Relationship.Builder()
.name("original")
.description("The original input file will be routed to this destination, even if no rows are retrieved based on provided conditions.")
.build();
static final Relationship REL_SUCCESS = new Relationship.Builder()
.name("success")
.description("All successful fetches are routed to this relationship.")
.build();
static final Relationship REL_FAILURE = new Relationship.Builder()
.name("failure")
.description("All failed fetches are routed to this relationship.")
.build();
static final String HBASE_TABLE_ATTR = "hbase.table";
static final String HBASE_ROWS_COUNT_ATTR = "hbase.rows.count";
static final List<PropertyDescriptor> properties;
static {
List<PropertyDescriptor> props = new ArrayList<>();
props.add(HBASE_CLIENT_SERVICE);
props.add(TABLE_NAME);
props.add(AUTHORIZATIONS);
props.add(START_ROW);
props.add(END_ROW);
props.add(TIME_RANGE_MIN);
props.add(TIME_RANGE_MAX);
props.add(LIMIT_ROWS);
props.add(REVERSED_SCAN);
props.add(BULK_SIZE);
props.add(FILTER_EXPRESSION);
props.add(COLUMNS);
props.add(JSON_FORMAT);
props.add(ENCODE_CHARSET);
props.add(DECODE_CHARSET);
props.add(BLOCK_CACHE);
properties = Collections.unmodifiableList(props);
}
static final Set<Relationship> relationships;
static {
Set<Relationship> rels = new HashSet<>();
rels.add(REL_SUCCESS);
rels.add(REL_ORIGINAL);
rels.add(REL_FAILURE);
relationships = Collections.unmodifiableSet(rels);
}
private volatile Charset decodeCharset;
private volatile Charset encodeCharset;
private RowSerializer serializer = null;
@OnScheduled
public void onScheduled(ProcessContext context) {
this.decodeCharset = Charset.forName(context.getProperty(DECODE_CHARSET).getValue());
this.encodeCharset = Charset.forName(context.getProperty(ENCODE_CHARSET).getValue());
final String jsonFormat = context.getProperty(JSON_FORMAT).getValue();
if (jsonFormat.equals(JSON_FORMAT_FULL_ROW.getValue())) {
this.serializer = new JsonFullRowSerializer(decodeCharset, encodeCharset);
} else {
this.serializer = new JsonQualifierAndValueRowSerializer(decodeCharset, encodeCharset);
}
}
@Override
protected List<PropertyDescriptor> getSupportedPropertyDescriptors() {
return properties;
}
@Override
public Set<Relationship> getRelationships() {
return relationships;
}
@Override
protected Collection<ValidationResult> customValidate(ValidationContext validationContext) {
final List<ValidationResult> problems = new ArrayList<>();
final String columns = validationContext.getProperty(COLUMNS).getValue();
final String filter = validationContext.getProperty(FILTER_EXPRESSION).getValue();
if (!StringUtils.isBlank(columns) && !StringUtils.isBlank(filter)) {
problems.add(new ValidationResult.Builder()
.subject(FILTER_EXPRESSION.getDisplayName())
.input(filter).valid(false)
.explanation("A filter expression can not be used in conjunction with the Columns property")
.build());
}
String minTS = validationContext.getProperty(TIME_RANGE_MIN).getValue();
String maxTS = validationContext.getProperty(TIME_RANGE_MAX).getValue();
if ( (!StringUtils.isBlank(minTS) && StringUtils.isBlank(maxTS)) || (StringUtils.isBlank(minTS) && !StringUtils.isBlank(maxTS))) {
problems.add(new ValidationResult.Builder()
.subject(TIME_RANGE_MAX.getDisplayName())
.input(maxTS).valid(false)
.explanation(String.format("%s and %s both should be either empty or provided", TIME_RANGE_MIN, TIME_RANGE_MAX))
.build());
}
return problems;
}
@Override
public void onTrigger(ProcessContext context, ProcessSession session) throws ProcessException {
FlowFile flowFile = session.get();
if (flowFile == null) {
return;
}
try {
final String tableName = context.getProperty(TABLE_NAME).evaluateAttributeExpressions(flowFile).getValue();
if (StringUtils.isBlank(tableName)) {
getLogger().error("Table Name is blank or null for {}, transferring to failure", flowFile);
session.transfer(session.penalize(flowFile), REL_FAILURE);
return;
}
final List<String> authorizations = getAuthorizations(context, flowFile);
final String startRow = context.getProperty(START_ROW).evaluateAttributeExpressions(flowFile).getValue();
final String endRow = context.getProperty(END_ROW).evaluateAttributeExpressions(flowFile).getValue();
final String filterExpression = context.getProperty(FILTER_EXPRESSION).evaluateAttributeExpressions(flowFile).getValue();
//evaluate and validate time range min and max values. They both should be either empty or provided.
Long timerangeMin = null;
Long timerangeMax = null;
try {
timerangeMin = context.getProperty(TIME_RANGE_MIN).evaluateAttributeExpressions(flowFile).asLong();
} catch (Exception e) {
getLogger().error("Time range min value is not a number ({}) for {}, transferring to failure",
new Object[] {context.getProperty(TIME_RANGE_MIN).evaluateAttributeExpressions(flowFile).getValue(), flowFile});
session.transfer(session.penalize(flowFile), REL_FAILURE);
return;
}
try {
timerangeMax = context.getProperty(TIME_RANGE_MAX).evaluateAttributeExpressions(flowFile).asLong();
} catch (Exception e) {
getLogger().error("Time range max value is not a number ({}) for {}, transferring to failure",
new Object[] {context.getProperty(TIME_RANGE_MAX).evaluateAttributeExpressions(flowFile).getValue(), flowFile});
session.transfer(session.penalize(flowFile), REL_FAILURE);
return;
}
if (timerangeMin == null && timerangeMax != null) {
getLogger().error("Time range min value cannot be blank when max value provided for {}, transferring to failure", flowFile);
session.transfer(session.penalize(flowFile), REL_FAILURE);
return;
} else if (timerangeMin != null && timerangeMax == null) {
getLogger().error("Time range max value cannot be blank when min value provided for {}, transferring to failure", flowFile);
session.transfer(session.penalize(flowFile), REL_FAILURE);
return;
}
final Integer limitRows = context.getProperty(LIMIT_ROWS).evaluateAttributeExpressions(flowFile).asInteger();
final Boolean isReversed = context.getProperty(REVERSED_SCAN).asBoolean();
final Boolean blockCache = context.getProperty(BLOCK_CACHE).asBoolean();
final Integer bulkSize = context.getProperty(BULK_SIZE).evaluateAttributeExpressions(flowFile).asInteger();
final List<Column> columns = getColumns(context.getProperty(COLUMNS).evaluateAttributeExpressions(flowFile).getValue());
final HBaseClientService hBaseClientService = context.getProperty(HBASE_CLIENT_SERVICE).asControllerService(HBaseClientService.class);
final AtomicReference<Long> rowsPulledHolder = new AtomicReference<>(0L);
final AtomicReference<Long> ffCountHolder = new AtomicReference<>(0L);
ScanHBaseResultHandler handler = new ScanHBaseResultHandler(context, session, flowFile, rowsPulledHolder, ffCountHolder, hBaseClientService, tableName, bulkSize);
try {
hBaseClientService.scan(tableName,
startRow, endRow,
filterExpression,
timerangeMin, timerangeMax,
limitRows,
isReversed,
blockCache,
columns,
authorizations,
handler);
} catch (Exception e) {
if (handler.getFlowFile() != null) {
session.remove(handler.getFlowFile());
}
getLogger().error("Unable to fetch rows from HBase table {}", tableName, e);
flowFile = session.putAttribute(flowFile, "scanhbase.results.found", Boolean.toString(handler.isHandledAny()));
session.transfer(flowFile, REL_FAILURE);
return;
}
flowFile = session.putAttribute(flowFile, "scanhbase.results.found", Boolean.toString(handler.isHandledAny()));
FlowFile openedFF = handler.getFlowFile();
if (openedFF != null) {
finalizeFlowFile(session, hBaseClientService, openedFF, tableName, handler.getRecordsCount(), null);
}
session.transfer(flowFile, REL_ORIGINAL);
} catch (final Exception e) {
getLogger().error("Failed to receive data from HBase due to {}", e);
session.rollback();
// if we failed, we want to yield so that we don't hammer hbase.
context.yield();
}
}
/*
* Initiates FF content, adds relevant attributes, and starts content with JSON array "["
*/
private FlowFile initNewFlowFile(final ProcessSession session, final FlowFile origFF, final String tableName) throws IOException {
FlowFile flowFile = session.create(origFF);
flowFile = session.putAttribute(flowFile, HBASE_TABLE_ATTR, tableName);
flowFile = session.putAttribute(flowFile, CoreAttributes.MIME_TYPE.key(), "application/json");
final AtomicReference<IOException> ioe = new AtomicReference<>(null);
flowFile = session.write(flowFile, (out) -> {
try {
out.write("[".getBytes());
} catch (IOException e) {
ioe.set(e);
}
});
if (ioe.get() != null) {
throw ioe.get();
}
return flowFile;
}
private void finalizeFlowFile(final ProcessSession session, final HBaseClientService hBaseClientService,
FlowFile flowFile, final String tableName, Long rowsPulled, Exception e) {
Relationship rel = REL_SUCCESS;
flowFile = session.putAttribute(flowFile, HBASE_ROWS_COUNT_ATTR, rowsPulled.toString());
final AtomicReference<IOException> ioe = new AtomicReference<>(null);
flowFile = session.append(flowFile, (out) -> {
try {
out.write("]".getBytes());
} catch (IOException ei) {
ioe.set(ei);
}
});
if (e != null || ioe.get() != null) {
flowFile = session.putAttribute(flowFile, "scanhbase.error", (e == null ? e : ioe.get()).toString());
rel = REL_FAILURE;
} else {
session.getProvenanceReporter().receive(flowFile, hBaseClientService.toTransitUri(tableName, "{ids}"));
}
session.transfer(flowFile, rel);
}
/**
* @param columnsValue a String in the form colFam:colQual,colFam:colQual
* @return a list of Columns based on parsing the given String
*/
private List<Column> getColumns(final String columnsValue) {
final String[] columns = (columnsValue == null || columnsValue.isEmpty() ? new String[0] : columnsValue.split(","));
List<Column> columnsList = new ArrayList<>(columns.length);
for (final String column : columns) {
if (column.contains(":")) {
final String[] parts = column.split(":");
final byte[] cf = parts[0].getBytes(StandardCharsets.UTF_8);
final byte[] cq = parts[1].getBytes(StandardCharsets.UTF_8);
columnsList.add(new Column(cf, cq));
} else {
final byte[] cf = column.getBytes(StandardCharsets.UTF_8);
columnsList.add(new Column(cf, null));
}
}
return columnsList;
}
/**
* @return number of rows to be committed to session.
*/
protected int getBatchSize() {
return 500;
}
/**
* Result Handler for Scan operation
*/
private class ScanHBaseResultHandler implements ResultHandler {
final private ProcessSession session;
final private FlowFile origFF;
final private AtomicReference<Long> rowsPulledHolder;
final private AtomicReference<Long> ffCountHolder;
final private HBaseClientService hBaseClientService;
final private String tableName;
final private Integer bulkSize;
private FlowFile flowFile = null;
private byte[] JSON_ARRAY_DELIM = ",\n".getBytes();
private boolean handledAny = false;
ScanHBaseResultHandler(final ProcessContext context, final ProcessSession session,
final FlowFile origFF, final AtomicReference<Long> rowsPulledHolder, final AtomicReference<Long> ffCountHolder,
final HBaseClientService hBaseClientService, final String tableName, final Integer bulkSize) {
this.session = session;
this.rowsPulledHolder = rowsPulledHolder;
this.ffCountHolder = ffCountHolder;
this.hBaseClientService = hBaseClientService;
this.tableName = tableName;
this.bulkSize = bulkSize == null ? 0 : bulkSize;
this.origFF = origFF;
}
@Override
public void handle(final byte[] rowKey, final ResultCell[] resultCells) {
long rowsPulled = rowsPulledHolder.get();
long ffUncommittedCount = ffCountHolder.get();
try {
if (flowFile == null) {
flowFile = initNewFlowFile(session, origFF, tableName);
ffUncommittedCount++;
}
flowFile = session.append(flowFile, (out) -> {
if (rowsPulledHolder.get() > 0) {
out.write(JSON_ARRAY_DELIM);
}
serializer.serialize(rowKey, resultCells, out);
});
handledAny = true;
} catch (Exception e) {
throw new RuntimeException(e);
}
rowsPulled++;
// bulkSize controls number of records per flow file.
if (bulkSize > 0 && rowsPulled >= bulkSize) {
finalizeFlowFile(session, hBaseClientService, flowFile, tableName, rowsPulled, null);
flowFile = null;
rowsPulledHolder.set(0L);
// we could potentially have a huge number of rows. If we get to batchSize, go ahead and commit the
// session so that we can avoid buffering tons of FlowFiles without ever sending any out.
if (getBatchSize() > 0 && ffUncommittedCount * bulkSize > getBatchSize()) {
session.commitAsync(() -> {
ffCountHolder.set(0L);
});
} else {
ffCountHolder.set(ffUncommittedCount++);
}
} else {
rowsPulledHolder.set(rowsPulled);
}
}
public boolean isHandledAny() {
return handledAny;
}
public FlowFile getFlowFile() {
return flowFile;
}
public long getRecordsCount() {
return rowsPulledHolder.get();
}
}
}

View File

@ -1,54 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.nifi.hbase;
import org.apache.nifi.components.PropertyDescriptor;
import org.apache.nifi.components.Validator;
import org.apache.nifi.expression.ExpressionLanguageScope;
import org.apache.nifi.flowfile.FlowFile;
import org.apache.nifi.processor.ProcessContext;
import org.apache.nifi.util.StringUtils;
import java.util.ArrayList;
import java.util.List;
public interface VisibilityFetchSupport {
PropertyDescriptor AUTHORIZATIONS = new PropertyDescriptor.Builder()
.name("hbase-fetch-row-authorizations")
.displayName("Authorizations")
.description("The list of authorizations to pass to the scanner. This will be ignored if cell visibility labels are not in use.")
.required(false)
.expressionLanguageSupported(ExpressionLanguageScope.FLOWFILE_ATTRIBUTES)
.addValidator(Validator.VALID)
.build();
default List<String> getAuthorizations(ProcessContext context, FlowFile flowFile) {
final String authorizationString = context.getProperty(AUTHORIZATIONS).isSet()
? context.getProperty(AUTHORIZATIONS).evaluateAttributeExpressions(flowFile).getValue().trim()
: "";
List<String> authorizations = new ArrayList<>();
if (!StringUtils.isBlank(authorizationString)) {
String[] parts = authorizationString.split(",");
for (String part : parts) {
authorizations.add(part.trim());
}
}
return authorizations;
}
}

View File

@ -1,134 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.nifi.hbase.io;
import org.apache.commons.text.StringEscapeUtils;
import org.apache.nifi.hbase.scan.ResultCell;
import org.apache.nifi.hbase.util.RowSerializerUtil;
import java.io.IOException;
import java.io.OutputStream;
import java.nio.charset.Charset;
/**
* Serializes a row from HBase to a JSON document of the form:
*
* {
* "row" : "row1",
* "cells": [
* {
* "family" : "fam1",
* "qualifier" : "qual1"
* "value" : "val1"
* "timestamp" : 123456789
* },
* {
* "family" : "fam1",
* "qualifier" : "qual2"
* "value" : "val2"
* "timestamp" : 123456789
* }
* ]
* }
*
* If base64encode is true, the row id, family, qualifier, and value will be represented as base 64 encoded strings.
*/
public class JsonFullRowSerializer implements RowSerializer {
private final Charset decodeCharset;
private final Charset encodeCharset;
private final boolean base64encode;
public JsonFullRowSerializer(final Charset decodeCharset, final Charset encodeCharset) {
this(decodeCharset, encodeCharset, false);
}
public JsonFullRowSerializer(final Charset decodeCharset, final Charset encodeCharset, final boolean base64encode) {
this.decodeCharset = decodeCharset;
this.encodeCharset = encodeCharset;
this.base64encode = base64encode;
}
@Override
public String serialize(byte[] rowKey, ResultCell[] cells) {
final String rowId = RowSerializerUtil.getRowId(rowKey, decodeCharset, base64encode);
final StringBuilder jsonBuilder = new StringBuilder();
jsonBuilder.append("{");
jsonBuilder.append("\"row\":");
appendString(jsonBuilder, rowId, base64encode);
jsonBuilder.append(", \"cells\": [");
int i = 0;
for (final ResultCell cell : cells) {
final String cellFamily = RowSerializerUtil.getCellFamily(cell, decodeCharset, base64encode);
final String cellQualifier = RowSerializerUtil.getCellQualifier(cell, decodeCharset, base64encode);
final String cellValue = RowSerializerUtil.getCellValue(cell, decodeCharset, base64encode);
if (i > 0) {
jsonBuilder.append(", ");
}
// start cell
jsonBuilder.append("{");
jsonBuilder.append("\"fam\":");
appendString(jsonBuilder, cellFamily, base64encode);
jsonBuilder.append(",\"qual\":");
appendString(jsonBuilder, cellQualifier, base64encode);
jsonBuilder.append(",\"val\":");
appendString(jsonBuilder, cellValue, base64encode);
jsonBuilder.append(",\"ts\":");
jsonBuilder.append(String.valueOf(cell.getTimestamp()));
// end cell
jsonBuilder.append("}");
i++;
}
// end cell array
jsonBuilder.append("]");
// end overall document
jsonBuilder.append("}");
return jsonBuilder.toString();
}
@Override
public void serialize(final byte[] rowKey, final ResultCell[] cells, final OutputStream out) throws IOException {
final String json = serialize(rowKey, cells);
out.write(json.getBytes(encodeCharset));
}
private void appendString(final StringBuilder jsonBuilder, final String str, final boolean base64encode) {
jsonBuilder.append("\"");
// only escape the value when not doing base64
if (!base64encode) {
jsonBuilder.append(StringEscapeUtils.escapeJson(str));
} else {
jsonBuilder.append(str);
}
jsonBuilder.append("\"");
}
}

View File

@ -1,98 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.nifi.hbase.io;
import org.apache.commons.text.StringEscapeUtils;
import org.apache.nifi.hbase.scan.ResultCell;
import org.apache.nifi.hbase.util.RowSerializerUtil;
import java.io.IOException;
import java.io.OutputStream;
import java.nio.charset.Charset;
/**
* Serializes an HBase row to a JSON document of the form:
*
* {
* "qual1" : "val1",
* "qual2" : "val2"
* }
*
* If base64encode is true, the qualifiers and values will be represented as base 64 encoded strings.
*/
public class JsonQualifierAndValueRowSerializer implements RowSerializer {
private final Charset decodeCharset;
private final Charset encodeCharset;
private final boolean base64encode;
public JsonQualifierAndValueRowSerializer(final Charset decodeCharset, final Charset encodeCharset) {
this(decodeCharset, encodeCharset, false);
}
public JsonQualifierAndValueRowSerializer(final Charset decodeCharset, final Charset encodeCharset, final boolean base64encode) {
this.decodeCharset = decodeCharset;
this.encodeCharset = encodeCharset;
this.base64encode = base64encode;
}
@Override
public String serialize(byte[] rowKey, ResultCell[] cells) {
final StringBuilder jsonBuilder = new StringBuilder();
jsonBuilder.append("{");
int i = 0;
for (final ResultCell cell : cells) {
final String cellQualifier = RowSerializerUtil.getCellQualifier(cell, decodeCharset, base64encode);
final String cellValue = RowSerializerUtil.getCellValue(cell, decodeCharset, base64encode);
if (i > 0) {
jsonBuilder.append(", ");
}
appendString(jsonBuilder, cellQualifier, base64encode);
jsonBuilder.append(":");
appendString(jsonBuilder, cellValue, base64encode);
i++;
}
jsonBuilder.append("}");
return jsonBuilder.toString();
}
@Override
public void serialize(final byte[] rowKey, final ResultCell[] cells, final OutputStream out) throws IOException {
final String json = serialize(rowKey, cells);
out.write(json.getBytes(encodeCharset));
}
private void appendString(final StringBuilder jsonBuilder, final String str, final boolean base64encode) {
jsonBuilder.append("\"");
// only escape the value when not doing base64
if (!base64encode) {
jsonBuilder.append(StringEscapeUtils.escapeJson(str));
} else {
jsonBuilder.append(str);
}
jsonBuilder.append("\"");
}
}

View File

@ -1,86 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.nifi.hbase.io;
import org.apache.commons.text.StringEscapeUtils;
import org.apache.nifi.hbase.scan.ResultCell;
import java.io.IOException;
import java.io.OutputStream;
import java.nio.charset.Charset;
/**
* Serializes a row from HBase to a JSON document of the form:
*
* {
* "row" : "row1",
* "cells": {
* "fam1:qual1" : "val1",
* "fam1:qual2" : "val2"
* }
* }
*
*/
public class JsonRowSerializer implements RowSerializer {
private final Charset charset;
public JsonRowSerializer(final Charset charset) {
this.charset = charset;
}
@Override
public String serialize(byte[] rowKey, ResultCell[] cells) {
final StringBuilder jsonBuilder = new StringBuilder();
jsonBuilder.append("{");
final String row = new String(rowKey, charset);
jsonBuilder.append("\"row\":")
.append("\"")
.append(StringEscapeUtils.escapeJson(row))
.append("\"");
jsonBuilder.append(", \"cells\": {");
int i = 0;
for (final ResultCell cell : cells) {
final String cellFamily = new String(cell.getFamilyArray(), cell.getFamilyOffset(), cell.getFamilyLength(), charset);
final String cellQualifier = new String(cell.getQualifierArray(), cell.getQualifierOffset(), cell.getQualifierLength(), charset);
if (i > 0) {
jsonBuilder.append(", ");
}
jsonBuilder.append("\"")
.append(StringEscapeUtils.escapeJson(cellFamily))
.append(":")
.append(StringEscapeUtils.escapeJson(cellQualifier))
.append("\":\"")
.append(StringEscapeUtils.escapeJson(new String(cell.getValueArray(), cell.getValueOffset(), cell.getValueLength(), charset)))
.append("\"");
i++;
}
jsonBuilder.append("}}");
return jsonBuilder.toString();
}
@Override
public void serialize(final byte[] rowKey, final ResultCell[] cells, final OutputStream out) throws IOException {
final String json = serialize(rowKey, cells);
out.write(json.getBytes(charset));
}
}

View File

@ -1,44 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.nifi.hbase.io;
import org.apache.nifi.hbase.scan.ResultCell;
import java.io.IOException;
import java.io.OutputStream;
public interface RowSerializer {
/**
* Serializes the given row and cells to the provided OutputStream
*
* @param rowKey the row's key
* @param cells the cells to serialize
* @param out the OutputStream to serialize to
* @throws IOException if unable to serialize the row
*/
void serialize(byte[] rowKey, ResultCell[] cells, OutputStream out) throws IOException;
/**
*
* @param rowKey the row key of the row being serialized
* @param cells the cells of the row being serialized
* @return the serialized string representing the row
*/
String serialize(byte[] rowKey, ResultCell[] cells);
}

View File

@ -1,56 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.nifi.hbase.util;
import org.apache.nifi.distributed.cache.client.Deserializer;
import org.apache.nifi.distributed.cache.client.Serializer;
import org.apache.nifi.distributed.cache.client.exception.DeserializationException;
import org.apache.nifi.distributed.cache.client.exception.SerializationException;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.ObjectInputStream;
import java.io.ObjectOutputStream;
import java.io.OutputStream;
public class ObjectSerDe implements Serializer<Object>, Deserializer<Object> {
@Override
public Object deserialize(byte[] input) throws DeserializationException, IOException {
if (input == null || input.length == 0) {
return null;
}
try (final ByteArrayInputStream in = new ByteArrayInputStream(input);
final ObjectInputStream objIn = new ObjectInputStream(in)) {
return objIn.readObject();
} catch (ClassNotFoundException e) {
throw new DeserializationException("Could not deserialize object due to ClassNotFoundException", e);
}
}
@Override
public void serialize(Object value, OutputStream output) throws SerializationException, IOException {
try (final ByteArrayOutputStream bOut = new ByteArrayOutputStream();
final ObjectOutputStream objOut = new ObjectOutputStream(bOut)) {
objOut.writeObject(value);
output.write(bOut.toByteArray());
}
}
}

View File

@ -1,96 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.nifi.hbase.util;
import org.apache.nifi.hbase.scan.ResultCell;
import java.nio.ByteBuffer;
import java.nio.charset.Charset;
import java.nio.charset.StandardCharsets;
import java.util.Base64;
public class RowSerializerUtil {
/**
* @param rowId the row id to get the string from
* @param charset the charset that was used to encode the cell's row
* @param base64encodeValues whether or not to base64 encode the returned string
*
* @return the String representation of the cell's row
*/
public static String getRowId(final byte[] rowId, final Charset charset, final boolean base64encodeValues) {
if (base64encodeValues) {
ByteBuffer cellRowBuffer = ByteBuffer.wrap(rowId);
ByteBuffer base64Buffer = Base64.getEncoder().encode(cellRowBuffer);
return new String(base64Buffer.array(), StandardCharsets.UTF_8);
} else {
return new String(rowId, charset);
}
}
/**
* @param cell the cell to get the family from
* @param charset the charset that was used to encode the cell's family
* @param base64encodeValues whether or not to base64 encode the returned string
*
* @return the String representation of the cell's family
*/
public static String getCellFamily(final ResultCell cell, final Charset charset, final boolean base64encodeValues) {
if (base64encodeValues) {
ByteBuffer cellFamilyBuffer = ByteBuffer.wrap(cell.getFamilyArray(), cell.getFamilyOffset(), cell.getFamilyLength());
ByteBuffer base64Buffer = Base64.getEncoder().encode(cellFamilyBuffer);
return new String(base64Buffer.array(), StandardCharsets.UTF_8);
} else {
return new String(cell.getFamilyArray(), cell.getFamilyOffset(), cell.getFamilyLength(), charset);
}
}
/**
* @param cell the cell to get the qualifier from
* @param charset the charset that was used to encode the cell's qualifier
* @param base64encodeValues whether or not to base64 encode the returned string
*
* @return the String representation of the cell's qualifier
*/
public static String getCellQualifier(final ResultCell cell, final Charset charset, final boolean base64encodeValues) {
if (base64encodeValues) {
ByteBuffer cellQualifierBuffer = ByteBuffer.wrap(cell.getQualifierArray(), cell.getQualifierOffset(), cell.getQualifierLength());
ByteBuffer base64Buffer = Base64.getEncoder().encode(cellQualifierBuffer);
return new String(base64Buffer.array(), StandardCharsets.UTF_8);
} else {
return new String(cell.getQualifierArray(), cell.getQualifierOffset(), cell.getQualifierLength(), charset);
}
}
/**
* @param cell the cell to get the value from
* @param charset the charset that was used to encode the cell's value
* @param base64encodeValues whether or not to base64 encode the returned string
*
* @return the String representation of the cell's value
*/
public static String getCellValue(final ResultCell cell, final Charset charset, final boolean base64encodeValues) {
if (base64encodeValues) {
ByteBuffer cellValueBuffer = ByteBuffer.wrap(cell.getValueArray(), cell.getValueOffset(), cell.getValueLength());
ByteBuffer base64Buffer = Base64.getEncoder().encode(cellValueBuffer);
return new String(base64Buffer.array(), StandardCharsets.UTF_8);
} else {
return new String(cell.getValueArray(), cell.getValueOffset(), cell.getValueLength(), charset);
}
}
}

View File

@ -1,44 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.nifi.hbase.util;
import org.apache.nifi.distributed.cache.client.Deserializer;
import org.apache.nifi.distributed.cache.client.Serializer;
import org.apache.nifi.distributed.cache.client.exception.DeserializationException;
import org.apache.nifi.distributed.cache.client.exception.SerializationException;
import java.io.IOException;
import java.io.OutputStream;
import java.nio.charset.StandardCharsets;
public class StringSerDe implements Serializer<String>, Deserializer<String> {
@Override
public String deserialize(final byte[] value) throws DeserializationException, IOException {
if ( value == null ) {
return null;
}
return new String(value, StandardCharsets.UTF_8);
}
@Override
public void serialize(final String value, final OutputStream out) throws SerializationException, IOException {
out.write(value.getBytes(StandardCharsets.UTF_8));
}
}

View File

@ -1,51 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.nifi.hbase.util;
import org.apache.commons.lang3.StringUtils;
import org.apache.nifi.components.PropertyValue;
import org.apache.nifi.flowfile.FlowFile;
import org.apache.nifi.processor.ProcessContext;
public class VisibilityUtil {
public static String pickVisibilityString(String columnFamily, String columnQualifier, FlowFile flowFile, ProcessContext context) {
if (StringUtils.isBlank(columnFamily)) {
return null;
}
String lookupKey = String.format("visibility.%s%s%s", columnFamily, !StringUtils.isBlank(columnQualifier) ? "." : "", columnQualifier);
String fromAttribute = flowFile.getAttribute(lookupKey);
if (fromAttribute == null && !StringUtils.isBlank(columnQualifier)) {
String lookupKeyFam = String.format("visibility.%s", columnFamily);
fromAttribute = flowFile.getAttribute(lookupKeyFam);
}
if (fromAttribute != null) {
return fromAttribute;
} else {
PropertyValue descriptor = context.getProperty(lookupKey);
if (descriptor == null || !descriptor.isSet()) {
descriptor = context.getProperty(String.format("visibility.%s", columnFamily));
}
String retVal = descriptor != null ? descriptor.evaluateAttributeExpressions(flowFile).getValue() : null;
return retVal;
}
}
}

View File

@ -1,24 +0,0 @@
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
org.apache.nifi.hbase.DeleteHBaseCells
org.apache.nifi.hbase.DeleteHBaseRow
org.apache.nifi.hbase.GetHBase
org.apache.nifi.hbase.ListHBaseRegions
org.apache.nifi.hbase.PutHBaseCell
org.apache.nifi.hbase.PutHBaseJSON
org.apache.nifi.hbase.PutHBaseRecord
org.apache.nifi.hbase.FetchHBaseRow
org.apache.nifi.hbase.ScanHBase

View File

@ -1,32 +0,0 @@
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
# DeleteHBaseCells
## Overview
This processor provides the ability to do deletes against one or more HBase cells, without having to delete the entire
row. It should be used as the primary delete method when visibility labels are in use and the cells have different
visibility labels. Each line in the flowfile body is a fully qualified cell (row id, column family, column qualifier and
visibility labels if applicable). The separator that separates each piece of the fully qualified cell is configurable,
but **::::** is the default value.
## Example FlowFile
```
row1::::user::::name
row1::::user::::address::::PII
row1::::user::::billing\_code\_1::::PII&&BILLING
```

View File

@ -1,30 +0,0 @@
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
# PutHBaseCell
## Visibility Labels
This processor provides the ability to attach visibility labels to HBase Puts that it generates, if visibility labels
are enabled on the HBase cluster. There are two ways to enable this:
* Attributes on the flowfile.
* Dynamic properties added to the processor.
When the dynamic properties are defined on the processor, they will be the default value, but can be overridden by
attributes set on the flowfile. The naming convention for both (property name and attribute name) is:
* visibility.COLUMN\_FAMILY - every column qualifier under the column family will get this.
* visibility.COLUMN\_FAMILY.COLUMN\_VISIBILITY - the qualified column qualifier will be assigned this value.

View File

@ -1,30 +0,0 @@
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
# PutHBaseJSON
## Visibility Labels
This processor provides the ability to attach visibility labels to HBase Puts that it generates, if visibility labels
are enabled on the HBase cluster. There are two ways to enable this:
* Attributes on the flowfile.
* Dynamic properties added to the processor.
When the dynamic properties are defined on the processor, they will be the default value, but can be overridden by
attributes set on the flowfile. The naming convention for both (property name and attribute name) is:
* visibility.COLUMN\_FAMILY - every column qualifier under the column family will get this.
* visibility.COLUMN\_FAMILY.COLUMN\_VISIBILITY - the qualified column qualifier will be assigned this value.

View File

@ -1,102 +0,0 @@
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
# PutHBaseRecord
## Visibility Labels:
PutHBaseRecord provides the ability to define a branch of the record as a map which contains an association between
column qualifiers and the visibility label that they should have assigned to them.
### Example Schema
```json
{
"type": "record",
"name": "SampleRecord",
"fields": [
{
"name": "name",
"type": "string"
},
{
"name": "address",
"type": "string"
},
{
"name": "dob",
"type": "string"
},
{
"name": "attendingPhysician",
"type": "string"
},
{
"name": "accountNumber",
"type": "string"
},
{
"name": "visibility_labels",
"type": {
"type": "map",
"values": "string"
}
}
]
}
```
### Example Record
```json
{
"name": "John Smith",
"address": "12345 Main Street",
"dob": "1970-01-01",
"attendingPhysician": "Dr. Jane Doe",
"accountNumber": "1234-567-890-ABC",
"visibility_labels": {
"name": "OPEN",
"address": "PII",
"dob": "PII",
"attendingPhysician": "PII&PHI",
"accountNumber": "PII&BILLING"
}
}
```
### Results in HBase
Example is for row with ID _patient-1_ and column family _patient_
| Row | Value | Visibility |
|--------------------------------------|-------------------|-------------|
| patient-1:patient:name | John Smith | OPEN |
| patient-1:patient:address | 12345 Main Street | PII |
| patient-1:patient: | 1970-01-01 | PII |
| patient-1:patient:attendingPhysician | Dr. Jane Doe | PII&PHI |
| patient-1:patient:accountNumber | 1234-567-890-ABC | PII&BILLING |
In addition to the branch for visibility labels, the same methods used for PutHBaseCell and PutHBaseJSON can be used.
They are:
* Attributes on the flowfile.
* Dynamic properties added to the processor.
When the dynamic properties are defined on the processor, they will be the default value, but can be overridden by
attributes set on the flowfile. The naming convention for both (property name and attribute name) is:
* visibility.COLUMN\_FAMILY - every column qualifier under the column family will get this.
* visibility.COLUMN\_FAMILY.COLUMN\_VISIBILITY - the qualified column qualifier will be assigned this value.

View File

@ -1,57 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.nifi.hbase;
import org.apache.nifi.reporting.InitializationException;
import org.apache.nifi.util.TestRunner;
import org.apache.nifi.util.TestRunners;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.UUID;
public class DeleteTestBase {
protected TestRunner runner;
protected MockHBaseClientService hBaseClient;
public void setup(Class clz) throws InitializationException {
runner = TestRunners.newTestRunner(clz);
hBaseClient = new MockHBaseClientService();
runner.addControllerService("hbaseClient", hBaseClient);
runner.enableControllerService(hBaseClient);
runner.setProperty(DeleteHBaseRow.TABLE_NAME, "nifi");
runner.setProperty(DeleteHBaseRow.HBASE_CLIENT_SERVICE, "hbaseClient");
}
List<String> populateTable(int max) {
List<String> ids = new ArrayList<>();
for (int index = 0; index < max; index++) {
String uuid = UUID.randomUUID().toString();
ids.add(uuid);
Map<String, String> cells = new HashMap<>();
cells.put("test", UUID.randomUUID().toString());
hBaseClient.addResult(uuid, cells, System.currentTimeMillis());
}
return ids;
}
}

View File

@ -1,104 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.nifi.hbase;
import static org.junit.jupiter.api.Assertions.assertTrue;
import java.nio.charset.StandardCharsets;
import java.util.Arrays;
import java.util.List;
import java.util.Map;
import org.apache.nifi.hbase.put.PutColumn;
import org.apache.nifi.hbase.put.PutFlowFile;
import org.apache.nifi.provenance.ProvenanceEventRecord;
import org.apache.nifi.provenance.ProvenanceEventType;
import org.apache.nifi.reporting.InitializationException;
import org.apache.nifi.util.TestRunner;
public class HBaseTestUtil {
public static void verifyPut(final String row, final String columnFamily, final Map<String, byte[]> columns, final List<PutFlowFile> puts) {
verifyPut(row, columnFamily, null, columns, puts);
}
public static void verifyPut(final String row, final String columnFamily, final Long timestamp, final Map<String, byte[]> columns, final List<PutFlowFile> puts) {
boolean foundPut = false;
for (final PutFlowFile put : puts) {
if (!row.equals(new String(put.getRow(), StandardCharsets.UTF_8))) {
continue;
}
if (put.getColumns() == null || put.getColumns().size() != columns.size()) {
continue;
}
// start off assuming we have all the columns
boolean foundAllColumns = true;
for (Map.Entry<String, byte[]> entry : columns.entrySet()) {
// determine if we have the current expected column
boolean foundColumn = false;
for (PutColumn putColumn : put.getColumns()) {
if (columnFamily.equals(new String(putColumn.getColumnFamily(), StandardCharsets.UTF_8))
&& entry.getKey().equals(new String(putColumn.getColumnQualifier(), StandardCharsets.UTF_8))
&& Arrays.equals(entry.getValue(), putColumn.getBuffer())
&& ((timestamp == null && putColumn.getTimestamp() == null)
|| (timestamp != null && timestamp.equals(putColumn.getTimestamp())) )) {
foundColumn = true;
break;
}
}
// if we didn't have the current expected column we know we don't have all expected columns
if (!foundColumn) {
foundAllColumns = false;
break;
}
}
// if we found all the expected columns this was a match so we can break
if (foundAllColumns) {
foundPut = true;
break;
}
}
assertTrue(foundPut);
}
public static void verifyEvent(final List<ProvenanceEventRecord> events, final String uri, final ProvenanceEventType eventType) {
boolean foundEvent = false;
for (final ProvenanceEventRecord event : events) {
if (event.getTransitUri().equals(uri) && event.getEventType().equals(eventType)) {
foundEvent = true;
break;
}
}
assertTrue(foundEvent);
}
public static MockHBaseClientService getHBaseClientService(final TestRunner runner) throws InitializationException {
final MockHBaseClientService hBaseClient = new MockHBaseClientService();
runner.addControllerService("hbaseClient", hBaseClient);
runner.enableControllerService(hBaseClient);
runner.setProperty(PutHBaseCell.HBASE_CLIENT_SERVICE, "hbaseClient");
return hBaseClient;
}
}

View File

@ -1,360 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.nifi.hbase;
import org.apache.nifi.controller.AbstractControllerService;
import org.apache.nifi.hbase.put.PutColumn;
import org.apache.nifi.hbase.put.PutFlowFile;
import org.apache.nifi.hbase.scan.Column;
import org.apache.nifi.hbase.scan.HBaseRegion;
import org.apache.nifi.hbase.scan.ResultCell;
import org.apache.nifi.hbase.scan.ResultHandler;
import java.io.IOException;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.HashMap;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
public class MockHBaseClientService extends AbstractControllerService implements HBaseClientService {
private Map<String, ResultCell[]> results = new LinkedHashMap<>();
private Map<String, List<PutFlowFile>> flowFilePuts = new HashMap<>();
private boolean throwException = false;
private boolean throwExceptionDuringBatchDelete = false;
private int numScans = 0;
private int numPuts = 0;
private int linesBeforeException = -1;
private List<HBaseRegion> regionsToReturn = new ArrayList<>();
@Override
public void put(String tableName, Collection<PutFlowFile> puts) throws IOException {
if (throwException) {
throw new IOException("exception");
}
if (testFailure) {
if (++numPuts == failureThreshold) {
throw new IOException();
}
}
this.flowFilePuts.put(tableName, new ArrayList<>(puts));
}
@Override
public void put(String tableName, byte[] startRow, Collection<PutColumn> columns) throws IOException {
throw new UnsupportedOperationException();
}
@Override
public boolean checkAndPut(String tableName, byte[] rowId, byte[] family, byte[] qualifier, byte[]value, PutColumn column) throws IOException {
throw new UnsupportedOperationException();
}
@Override
public void delete(String tableName, byte[] rowId) throws IOException {
throw new UnsupportedOperationException();
}
@Override
public void delete(String tableName, byte[] rowId, String visibilityLabel) throws IOException { }
private int deletePoint = 0;
public void setDeletePoint(int deletePoint) {
this.deletePoint = deletePoint;
}
@Override
public void delete(String tableName, List<byte[]> rowIds) throws IOException {
if (throwException) {
throw new RuntimeException("Simulated connectivity error");
}
int index = 0;
for (byte[] id : rowIds) {
String key = new String(id);
Object val = results.remove(key);
if (index == deletePoint && throwExceptionDuringBatchDelete) {
throw new RuntimeException("Forcing write of restart.index");
}
if (val == null && deletePoint >= 0) {
throw new RuntimeException(String.format("%s was never added.", key));
}
index++;
}
}
@Override
public void deleteCells(String tableName, List<DeleteRequest> deletes) throws IOException {
for (DeleteRequest req : deletes) {
results.remove(new String(req.getRowId()));
}
}
@Override
public void delete(String tableName, List<byte[]> rowIds, String visibilityLabel) throws IOException {
delete(tableName, rowIds);
}
public int size() {
return results.size();
}
public boolean isEmpty() {
return results.isEmpty();
}
@Override
public void scan(String tableName, byte[] startRow, byte[] endRow, Collection<Column> columns, List<String> labels, ResultHandler handler) throws IOException {
if (throwException) {
throw new IOException("exception");
}
for (final Map.Entry<String, ResultCell[]> entry : results.entrySet()) {
List<ResultCell> matchedCells = new ArrayList<>();
if (columns == null || columns.isEmpty()) {
Arrays.stream(entry.getValue()).forEach(e -> matchedCells.add(e));
} else {
for (Column column : columns) {
String colFam = new String(column.getFamily(), StandardCharsets.UTF_8);
String colQual = new String(column.getQualifier(), StandardCharsets.UTF_8);
for (ResultCell cell : entry.getValue()) {
String cellFam = new String(cell.getFamilyArray(), StandardCharsets.UTF_8);
String cellQual = new String(cell.getQualifierArray(), StandardCharsets.UTF_8);
if (colFam.equals(cellFam) && colQual.equals(cellQual)) {
matchedCells.add(cell);
}
}
}
}
handler.handle(entry.getKey().getBytes(StandardCharsets.UTF_8), matchedCells.toArray(new ResultCell[matchedCells.size()]));
}
numScans++;
}
@Override
public void scan(String tableName, Collection<Column> columns, String filterExpression, long minTime, ResultHandler handler) throws IOException {
if (throwException) {
throw new IOException("exception");
}
// pass all the staged data to the handler
for (final Map.Entry<String, ResultCell[]> entry : results.entrySet()) {
handler.handle(entry.getKey().getBytes(StandardCharsets.UTF_8), entry.getValue());
}
numScans++;
}
@Override
public void scan(String tableName, Collection<Column> columns, String filterExpression, long minTime, List<String> visibilityLabels, ResultHandler handler) throws IOException {
scan(tableName, columns, filterExpression, minTime, handler);
}
@Override
public void scan(String tableName, String startRow, String endRow, String filterExpression, Long timerangeMin,
Long timerangeMax, Integer limitRows, Boolean isReversed, Boolean blockCache, Collection<Column> columns, List<String> visibilityLabels, ResultHandler handler)
throws IOException {
if (throwException) {
throw new IOException("exception");
}
int i = 0;
// pass all the staged data to the handler
for (final Map.Entry<String, ResultCell[]> entry : results.entrySet()) {
if (linesBeforeException >= 0 && i++ >= linesBeforeException) {
throw new IOException("iterating exception");
}
handler.handle(entry.getKey().getBytes(StandardCharsets.UTF_8), entry.getValue());
}
// delegate to the handler
numScans++;
}
public void addResult(final String rowKey, final Map<String, String> cells, final long timestamp) {
final byte[] rowArray = rowKey.getBytes(StandardCharsets.UTF_8);
final ResultCell[] cellArray = new ResultCell[cells.size()];
int i = 0;
for (final Map.Entry<String, String> cellEntry : cells.entrySet()) {
final ResultCell cell = new ResultCell();
cell.setRowArray(rowArray);
cell.setRowOffset(0);
cell.setRowLength((short) rowArray.length);
final String cellValue = cellEntry.getValue();
final byte[] valueArray = cellValue.getBytes(StandardCharsets.UTF_8);
cell.setValueArray(valueArray);
cell.setValueOffset(0);
cell.setValueLength(valueArray.length);
final byte[] familyArray = "nifi".getBytes(StandardCharsets.UTF_8);
cell.setFamilyArray(familyArray);
cell.setFamilyOffset(0);
cell.setFamilyLength((byte) familyArray.length);
final String qualifier = cellEntry.getKey();
final byte[] qualifierArray = qualifier.getBytes(StandardCharsets.UTF_8);
cell.setQualifierArray(qualifierArray);
cell.setQualifierOffset(0);
cell.setQualifierLength(qualifierArray.length);
cell.setTimestamp(timestamp);
cellArray[i++] = cell;
}
results.put(rowKey, cellArray);
}
public Map<String, List<PutFlowFile>> getFlowFilePuts() {
return flowFilePuts;
}
public void setThrowException(boolean throwException) {
this.throwException = throwException;
}
public int getNumScans() {
return numScans;
}
@Override
public List<HBaseRegion> listHBaseRegions(final String tableName) throws HBaseClientException {
return regionsToReturn;
}
public void addHBaseRegion(final HBaseRegion region) {
regionsToReturn.add(region);
}
public void addHBaseRegions(final List<HBaseRegion> regions) {
regionsToReturn.addAll(regions);
}
@Override
public byte[] toBytes(final boolean b) {
return new byte[] {b ? (byte) -1 : (byte) 0};
}
@Override
public byte[] toBytes(float f) {
return toBytes((double) f);
}
@Override
public byte[] toBytes(int i) {
return toBytes((long) i);
}
@Override
public byte[] toBytes(long l) {
byte[] b = new byte[8];
for (int i = 7; i > 0; i--) {
b[i] = (byte) l;
l >>>= 8;
}
b[0] = (byte) l;
return b;
}
@Override
public byte[] toBytes(final double d) {
return toBytes(Double.doubleToRawLongBits(d));
}
@Override
public byte[] toBytes(final String s) {
return s.getBytes(StandardCharsets.UTF_8);
}
@Override
public byte[] toBytesBinary(String s) {
return convertToBytesBinary(s);
}
private boolean testFailure = false;
public void setTestFailure(boolean testFailure) {
this.testFailure = testFailure;
}
private int failureThreshold = 1;
public void setFailureThreshold(int failureThreshold) {
this.failureThreshold = failureThreshold;
}
public boolean isThrowExceptionDuringBatchDelete() {
return throwExceptionDuringBatchDelete;
}
public void setThrowExceptionDuringBatchDelete(boolean throwExceptionDuringBatchDelete) {
this.throwExceptionDuringBatchDelete = throwExceptionDuringBatchDelete;
}
public int getLinesBeforeException() {
return linesBeforeException;
}
public void setLinesBeforeException(int linesBeforeException) {
this.linesBeforeException = linesBeforeException;
}
private byte[] convertToBytesBinary(String in) {
byte[] b = new byte[in.length()];
int size = 0;
for (int i = 0; i < in.length(); ++i) {
char ch = in.charAt(i);
if (ch == '\\' && in.length() > i + 1 && in.charAt(i + 1) == 'x') {
char hd1 = in.charAt(i + 2);
char hd2 = in.charAt(i + 3);
if (isHexDigit(hd1) && isHexDigit(hd2)) {
byte d = (byte) ((toBinaryFromHex((byte) hd1) << 4) + toBinaryFromHex((byte) hd2));
b[size++] = d;
i += 3;
}
} else {
b[size++] = (byte) ch;
}
}
byte[] b2 = new byte[size];
System.arraycopy(b, 0, b2, 0, size);
return b2;
}
private static boolean isHexDigit(char c) {
return c >= 'A' && c <= 'F' || c >= '0' && c <= '9';
}
private static byte toBinaryFromHex(byte ch) {
return ch >= 65 && ch <= 70 ? (byte) (10 + (byte) (ch - 65)) : (byte) (ch - 48);
}
}

View File

@ -1,64 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.nifi.hbase;
import org.apache.nifi.reporting.InitializationException;
import org.apache.nifi.util.MockFlowFile;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
import java.util.List;
public class TestDeleteHBaseCells extends DeleteTestBase {
@BeforeEach
public void setup() throws InitializationException {
super.setup(DeleteHBaseCells.class);
}
@Test
public void testSimpleDelete() {
final String SEP = "::::";
List<String> ids = populateTable(10000);
runner.setProperty(DeleteHBaseCells.SEPARATOR, SEP);
runner.assertValid();
StringBuilder sb = new StringBuilder();
for (String id : ids) {
sb.append(String.format("%s%sX%sY\n", id, SEP, SEP));
}
runner.enqueue(sb.toString().trim());
runner.run();
runner.assertAllFlowFilesTransferred(DeleteHBaseCells.REL_SUCCESS);
}
@Test
public void testWrongNumberOfInputs() {
final String SEP = "::::";
List<String> ids = populateTable(10000);
runner.setProperty(DeleteHBaseCells.SEPARATOR, SEP);
runner.assertValid();
StringBuilder sb = new StringBuilder();
for (String id : ids) {
sb.append(String.format("%s%sX\n", id, SEP));
}
runner.enqueue(sb.toString().trim());
runner.run();
runner.assertAllFlowFilesTransferred(DeleteHBaseCells.REL_FAILURE);
MockFlowFile flowFile = runner.getFlowFilesForRelationship(DeleteHBaseCells.REL_FAILURE).get(0);
flowFile.assertAttributeEquals(DeleteHBaseCells.ERROR_MSG, "Invalid line length. It must have 3 or 4 components. It had 2.");
}
}

View File

@ -1,172 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.nifi.hbase;
import org.apache.nifi.reporting.InitializationException;
import org.apache.nifi.util.MockFlowFile;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import static org.junit.jupiter.api.Assertions.assertFalse;
import static org.junit.jupiter.api.Assertions.assertNotNull;
import static org.junit.jupiter.api.Assertions.assertNull;
import static org.junit.jupiter.api.Assertions.assertTrue;
public class TestDeleteHBaseRow extends DeleteTestBase {
@BeforeEach
public void setup() throws InitializationException {
super.setup(DeleteHBaseRow.class);
}
@Test
public void testSimpleDelete() {
List<String> ids = populateTable(100);
runner.setProperty(DeleteHBaseRow.BATCH_SIZE, "100");
runner.setProperty(DeleteHBaseRow.FLOWFILE_FETCH_COUNT, "100");
for (String id : ids) {
runner.enqueue(id);
}
runner.run(1, true);
assertTrue(hBaseClient.isEmpty(), "The mock client was not empty.");
}
private String buildSeparatedString(List<String> ids, String separator) {
StringBuilder sb = new StringBuilder();
for (int index = 1; index <= ids.size(); index++) {
sb.append(ids.get(index - 1)).append(separator);
}
return sb.toString();
}
private void testSeparatedDeletes(String separator) {
testSeparatedDeletes(separator, separator, new HashMap());
}
private void testSeparatedDeletes(String separator, String separatorProp, Map attrs) {
List<String> ids = populateTable(10000);
runner.setProperty(DeleteHBaseRow.KEY_SEPARATOR, separator);
runner.setProperty(DeleteHBaseRow.BATCH_SIZE, "100");
runner.enqueue(buildSeparatedString(ids, separatorProp), attrs);
runner.run(1, true);
assertTrue(hBaseClient.isEmpty(), "The mock client was not empty.");
}
@Test
public void testDeletesSeparatedByNewLines() {
testSeparatedDeletes("\n");
}
@Test
public void testDeletesSeparatedByCommas() {
testSeparatedDeletes(",");
}
@Test
public void testDeleteWithELSeparator() {
Map<String, String> attrs = new HashMap<>();
attrs.put("test.separator", "____");
testSeparatedDeletes("${test.separator}", "____", attrs);
}
@Test
public void testDeleteWithExpressionLanguage() {
List<String> ids = populateTable(1000);
for (String id : ids) {
String[] parts = id.split("-");
Map<String, String> attrs = new HashMap<>();
for (int index = 0; index < parts.length; index++) {
attrs.put(String.format("part_%d", index), parts[index]);
}
runner.enqueue(id, attrs);
}
runner.setProperty(DeleteHBaseRow.ROW_ID, "${part_0}-${part_1}-${part_2}-${part_3}-${part_4}");
runner.setProperty(DeleteHBaseRow.ROW_ID_LOCATION, DeleteHBaseRow.ROW_ID_ATTR);
runner.setProperty(DeleteHBaseRow.BATCH_SIZE, "200");
runner.run(1, true);
}
@Test
public void testConnectivityErrorHandling() {
List<String> ids = populateTable(100);
for (String id : ids) {
runner.enqueue(id);
}
boolean exception = false;
try {
hBaseClient.setThrowException(true);
runner.run(1, true);
} catch (Exception ex) {
exception = true;
} finally {
hBaseClient.setThrowException(false);
}
assertFalse(exception, "An unhandled exception was caught.");
}
@Test
public void testRestartIndexAttribute() {
List<String> ids = populateTable(500);
StringBuilder sb = new StringBuilder();
for (int index = 0; index < ids.size(); index++) {
sb.append(ids.get(index)).append( index < ids.size() - 1 ? "," : "");
}
runner.enqueue(sb.toString());
runner.setProperty(DeleteHBaseRow.ROW_ID_LOCATION, DeleteHBaseRow.ROW_ID_CONTENT);
assertTrue(hBaseClient.size() == 500, "There should have been 500 rows.");
hBaseClient.setDeletePoint(20);
hBaseClient.setThrowExceptionDuringBatchDelete(true);
runner.run(1, true, true);
runner.assertTransferCount(DeleteHBaseRow.REL_FAILURE, 1);
runner.assertTransferCount(DeleteHBaseRow.REL_SUCCESS, 0);
assertTrue(hBaseClient.size() < 500, "Partially deleted");
List<MockFlowFile> flowFile = runner.getFlowFilesForRelationship(DeleteHBaseRow.REL_FAILURE);
assertNotNull(flowFile.get(0).getAttribute("restart.index"), "Missing restart.index attribute");
byte[] oldData = runner.getContentAsByteArray(flowFile.get(0));
Map<String, String> attrs = new HashMap<>();
attrs.put("restart.index", flowFile.get(0).getAttribute("restart.index"));
runner.enqueue(oldData, attrs);
hBaseClient.setDeletePoint(-1);
hBaseClient.setThrowExceptionDuringBatchDelete(false);
runner.clearTransferState();
runner.run(1, true, true);
runner.assertTransferCount(DeleteHBaseRow.REL_FAILURE, 0);
runner.assertTransferCount(DeleteHBaseRow.REL_SUCCESS, 1);
flowFile = runner.getFlowFilesForRelationship(DeleteHBaseRow.REL_SUCCESS);
assertTrue(hBaseClient.isEmpty(), "The client should have been empty");
assertNull(flowFile.get(0).getAttribute("restart.index"), "The restart.index attribute should be null");
}
}

View File

@ -1,411 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.nifi.hbase;
import org.apache.commons.codec.binary.Base64;
import org.apache.nifi.reporting.InitializationException;
import org.apache.nifi.util.MockFlowFile;
import org.apache.nifi.util.TestRunner;
import org.apache.nifi.util.TestRunners;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
import java.nio.charset.StandardCharsets;
import java.util.HashMap;
import java.util.LinkedHashMap;
import java.util.Map;
import static org.junit.jupiter.api.Assertions.assertEquals;
public class TestFetchHBaseRow {
private FetchHBaseRow proc;
private MockHBaseClientService hBaseClientService;
private TestRunner runner;
@BeforeEach
public void setup() throws InitializationException {
proc = new FetchHBaseRow();
runner = TestRunners.newTestRunner(proc);
runner.setProperty(FetchHBaseRow.AUTHORIZATIONS, "");
hBaseClientService = new MockHBaseClientService();
runner.addControllerService("hbaseClient", hBaseClientService);
runner.enableControllerService(hBaseClientService);
runner.setProperty(FetchHBaseRow.HBASE_CLIENT_SERVICE, "hbaseClient");
}
@Test
public void testColumnsValidation() {
runner.setProperty(FetchHBaseRow.AUTHORIZATIONS, "");
runner.setProperty(FetchHBaseRow.TABLE_NAME, "table1");
runner.setProperty(FetchHBaseRow.ROW_ID, "row1");
runner.assertValid();
runner.setProperty(FetchHBaseRow.COLUMNS, "cf1:cq1");
runner.assertValid();
runner.setProperty(FetchHBaseRow.COLUMNS, "cf1");
runner.assertValid();
runner.setProperty(FetchHBaseRow.COLUMNS, "cf1:cq1,cf2:cq2,cf3:cq3");
runner.assertValid();
runner.setProperty(FetchHBaseRow.COLUMNS, "cf1,cf2:cq1,cf3");
runner.assertValid();
runner.setProperty(FetchHBaseRow.COLUMNS, "cf1 cf2,cf3");
runner.assertNotValid();
runner.setProperty(FetchHBaseRow.COLUMNS, "cf1:,cf2,cf3");
runner.assertNotValid();
runner.setProperty(FetchHBaseRow.COLUMNS, "cf1:cq1,");
runner.assertNotValid();
}
@Test
public void testNoIncomingFlowFile() {
runner.setProperty(FetchHBaseRow.TABLE_NAME, "table1");
runner.setProperty(FetchHBaseRow.ROW_ID, "row1");
runner.setProperty(FetchHBaseRow.AUTHORIZATIONS, "");
runner.run();
runner.assertTransferCount(FetchHBaseRow.REL_FAILURE, 0);
runner.assertTransferCount(FetchHBaseRow.REL_SUCCESS, 0);
runner.assertTransferCount(FetchHBaseRow.REL_NOT_FOUND, 0);
assertEquals(0, hBaseClientService.getNumScans());
}
@Test
public void testInvalidTableName() {
runner.setProperty(FetchHBaseRow.TABLE_NAME, "${hbase.table}");
runner.setProperty(FetchHBaseRow.ROW_ID, "row1");
runner.setProperty(FetchHBaseRow.AUTHORIZATIONS, "");
runner.enqueue("trigger flow file");
runner.run();
runner.assertTransferCount(FetchHBaseRow.REL_FAILURE, 1);
runner.assertTransferCount(FetchHBaseRow.REL_SUCCESS, 0);
runner.assertTransferCount(FetchHBaseRow.REL_NOT_FOUND, 0);
assertEquals(0, hBaseClientService.getNumScans());
}
@Test
public void testInvalidRowId() {
runner.setProperty(FetchHBaseRow.TABLE_NAME, "table1");
runner.setProperty(FetchHBaseRow.ROW_ID, "${hbase.row}");
runner.setProperty(FetchHBaseRow.AUTHORIZATIONS, "");
runner.enqueue("trigger flow file");
runner.run();
runner.assertTransferCount(FetchHBaseRow.REL_FAILURE, 1);
runner.assertTransferCount(FetchHBaseRow.REL_SUCCESS, 0);
runner.assertTransferCount(FetchHBaseRow.REL_NOT_FOUND, 0);
assertEquals(0, hBaseClientService.getNumScans());
}
@Test
public void testFetchToAttributesWithStringValues() {
final Map<String, String> cells = new LinkedHashMap<>();
cells.put("cq1", "val1");
cells.put("cq2", "val2");
final long ts1 = 123456789;
hBaseClientService.addResult("row1", cells, ts1);
runner.setProperty(FetchHBaseRow.AUTHORIZATIONS, "");
runner.setProperty(FetchHBaseRow.TABLE_NAME, "table1");
runner.setProperty(FetchHBaseRow.ROW_ID, "row1");
runner.setProperty(FetchHBaseRow.DESTINATION, FetchHBaseRow.DESTINATION_ATTRIBUTES);
runner.enqueue("trigger flow file");
runner.run();
runner.assertTransferCount(FetchHBaseRow.REL_FAILURE, 0);
runner.assertTransferCount(FetchHBaseRow.REL_SUCCESS, 1);
runner.assertTransferCount(FetchHBaseRow.REL_NOT_FOUND, 0);
final MockFlowFile flowFile = runner.getFlowFilesForRelationship(FetchHBaseRow.REL_SUCCESS).get(0);
flowFile.assertAttributeEquals(FetchHBaseRow.HBASE_ROW_ATTR,
"{\"row\":\"row1\", \"cells\": [" +
"{\"fam\":\"nifi\",\"qual\":\"cq1\",\"val\":\"val1\",\"ts\":" + ts1 + "}, " +
"{\"fam\":\"nifi\",\"qual\":\"cq2\",\"val\":\"val2\",\"ts\":" + ts1 + "}]}");
assertEquals(1, hBaseClientService.getNumScans());
}
@Test
public void testFetchSpecificColumnsToAttributesWithStringValues() {
final Map<String, String> cells = new HashMap<>();
cells.put("cq1", "val1");
cells.put("cq2", "val2");
final long ts1 = 123456789;
hBaseClientService.addResult("row1", cells, ts1);
runner.setProperty(FetchHBaseRow.AUTHORIZATIONS, "");
runner.setProperty(FetchHBaseRow.TABLE_NAME, "table1");
runner.setProperty(FetchHBaseRow.ROW_ID, "row1");
runner.setProperty(FetchHBaseRow.COLUMNS, "nifi:cq2");
runner.setProperty(FetchHBaseRow.DESTINATION, FetchHBaseRow.DESTINATION_ATTRIBUTES);
runner.enqueue("trigger flow file");
runner.run();
runner.assertTransferCount(FetchHBaseRow.REL_FAILURE, 0);
runner.assertTransferCount(FetchHBaseRow.REL_SUCCESS, 1);
runner.assertTransferCount(FetchHBaseRow.REL_NOT_FOUND, 0);
final MockFlowFile flowFile = runner.getFlowFilesForRelationship(FetchHBaseRow.REL_SUCCESS).get(0);
flowFile.assertAttributeEquals(FetchHBaseRow.HBASE_ROW_ATTR,
"{\"row\":\"row1\", \"cells\": [{\"fam\":\"nifi\",\"qual\":\"cq2\",\"val\":\"val2\",\"ts\":" + ts1 + "}]}");
assertEquals(1, hBaseClientService.getNumScans());
}
@Test
public void testFetchToAttributesWithBase64Values() {
final Map<String, String> cells = new LinkedHashMap<>();
cells.put("cq1", "val1");
cells.put("cq2", "val2");
final long ts1 = 123456789;
hBaseClientService.addResult("row1", cells, ts1);
runner.setProperty(FetchHBaseRow.TABLE_NAME, "table1");
runner.setProperty(FetchHBaseRow.ROW_ID, "row1");
runner.setProperty(FetchHBaseRow.DESTINATION, FetchHBaseRow.DESTINATION_ATTRIBUTES);
runner.setProperty(FetchHBaseRow.JSON_VALUE_ENCODING, FetchHBaseRow.ENCODING_BASE64);
runner.enqueue("trigger flow file");
runner.run();
runner.assertTransferCount(FetchHBaseRow.REL_FAILURE, 0);
runner.assertTransferCount(FetchHBaseRow.REL_SUCCESS, 1);
runner.assertTransferCount(FetchHBaseRow.REL_NOT_FOUND, 0);
final String rowBase64 = Base64.encodeBase64String("row1".getBytes(StandardCharsets.UTF_8));
final String fam1Base64 = Base64.encodeBase64String("nifi".getBytes(StandardCharsets.UTF_8));
final String qual1Base64 = Base64.encodeBase64String("cq1".getBytes(StandardCharsets.UTF_8));
final String val1Base64 = Base64.encodeBase64String("val1".getBytes(StandardCharsets.UTF_8));
final String fam2Base64 = Base64.encodeBase64String("nifi".getBytes(StandardCharsets.UTF_8));
final String qual2Base64 = Base64.encodeBase64String("cq2".getBytes(StandardCharsets.UTF_8));
final String val2Base64 = Base64.encodeBase64String("val2".getBytes(StandardCharsets.UTF_8));
final MockFlowFile flowFile = runner.getFlowFilesForRelationship(FetchHBaseRow.REL_SUCCESS).get(0);
flowFile.assertAttributeEquals(FetchHBaseRow.HBASE_ROW_ATTR,
"{\"row\":\"" + rowBase64 + "\", \"cells\": [" +
"{\"fam\":\"" + fam1Base64 + "\",\"qual\":\"" + qual1Base64 + "\",\"val\":\"" + val1Base64 + "\",\"ts\":" + ts1 + "}, " +
"{\"fam\":\"" + fam2Base64 + "\",\"qual\":\"" + qual2Base64 + "\",\"val\":\"" + val2Base64 + "\",\"ts\":" + ts1 + "}]}");
assertEquals(1, hBaseClientService.getNumScans());
}
@Test
public void testFetchToAttributesNoResults() {
runner.setProperty(FetchHBaseRow.TABLE_NAME, "table1");
runner.setProperty(FetchHBaseRow.ROW_ID, "row1");
runner.setProperty(FetchHBaseRow.DESTINATION, FetchHBaseRow.DESTINATION_ATTRIBUTES);
runner.enqueue("trigger flow file");
runner.run();
runner.assertTransferCount(FetchHBaseRow.REL_FAILURE, 0);
runner.assertTransferCount(FetchHBaseRow.REL_SUCCESS, 0);
runner.assertTransferCount(FetchHBaseRow.REL_NOT_FOUND, 1);
assertEquals(1, hBaseClientService.getNumScans());
}
@Test
public void testFetchToContentWithStringValues() {
final Map<String, String> cells = new LinkedHashMap<>();
cells.put("cq1", "val1");
cells.put("cq2", "val2");
final long ts1 = 123456789;
hBaseClientService.addResult("row1", cells, ts1);
runner.setProperty(FetchHBaseRow.TABLE_NAME, "table1");
runner.setProperty(FetchHBaseRow.ROW_ID, "row1");
runner.setProperty(FetchHBaseRow.DESTINATION, FetchHBaseRow.DESTINATION_CONTENT);
runner.enqueue("trigger flow file");
runner.run();
runner.assertTransferCount(FetchHBaseRow.REL_FAILURE, 0);
runner.assertTransferCount(FetchHBaseRow.REL_SUCCESS, 1);
runner.assertTransferCount(FetchHBaseRow.REL_NOT_FOUND, 0);
final MockFlowFile flowFile = runner.getFlowFilesForRelationship(FetchHBaseRow.REL_SUCCESS).get(0);
flowFile.assertContentEquals("{\"row\":\"row1\", \"cells\": [" +
"{\"fam\":\"nifi\",\"qual\":\"cq1\",\"val\":\"val1\",\"ts\":" + ts1 + "}, " +
"{\"fam\":\"nifi\",\"qual\":\"cq2\",\"val\":\"val2\",\"ts\":" + ts1 + "}]}");
assertEquals(1, hBaseClientService.getNumScans());
}
@Test
public void testFetchSpecificColumnsToContentWithStringValues() {
final Map<String, String> cells = new HashMap<>();
cells.put("cq1", "val1");
cells.put("cq2", "val2");
final long ts1 = 123456789;
hBaseClientService.addResult("row1", cells, ts1);
runner.setProperty(FetchHBaseRow.TABLE_NAME, "table1");
runner.setProperty(FetchHBaseRow.ROW_ID, "row1");
runner.setProperty(FetchHBaseRow.DESTINATION, FetchHBaseRow.DESTINATION_CONTENT);
runner.setProperty(FetchHBaseRow.COLUMNS, "nifi:cq2");
runner.enqueue("trigger flow file");
runner.run();
runner.assertTransferCount(FetchHBaseRow.REL_FAILURE, 0);
runner.assertTransferCount(FetchHBaseRow.REL_SUCCESS, 1);
runner.assertTransferCount(FetchHBaseRow.REL_NOT_FOUND, 0);
final MockFlowFile flowFile = runner.getFlowFilesForRelationship(FetchHBaseRow.REL_SUCCESS).get(0);
flowFile.assertContentEquals("{\"row\":\"row1\", \"cells\": [{\"fam\":\"nifi\",\"qual\":\"cq2\",\"val\":\"val2\",\"ts\":" + ts1 + "}]}");
assertEquals(1, hBaseClientService.getNumScans());
}
@Test
public void testFetchSpecificColumnsToContentWithBase64() {
final Map<String, String> cells = new LinkedHashMap<>();
cells.put("cq1", "val1");
cells.put("cq2", "val2");
final long ts1 = 123456789;
hBaseClientService.addResult("row1", cells, ts1);
runner.setProperty(FetchHBaseRow.TABLE_NAME, "table1");
runner.setProperty(FetchHBaseRow.ROW_ID, "row1");
runner.setProperty(FetchHBaseRow.DESTINATION, FetchHBaseRow.DESTINATION_CONTENT);
runner.setProperty(FetchHBaseRow.JSON_VALUE_ENCODING, FetchHBaseRow.ENCODING_BASE64);
runner.enqueue("trigger flow file");
runner.run();
runner.assertTransferCount(FetchHBaseRow.REL_FAILURE, 0);
runner.assertTransferCount(FetchHBaseRow.REL_SUCCESS, 1);
runner.assertTransferCount(FetchHBaseRow.REL_NOT_FOUND, 0);
final String rowBase64 = Base64.encodeBase64String("row1".getBytes(StandardCharsets.UTF_8));
final String fam1Base64 = Base64.encodeBase64String("nifi".getBytes(StandardCharsets.UTF_8));
final String qual1Base64 = Base64.encodeBase64String("cq1".getBytes(StandardCharsets.UTF_8));
final String val1Base64 = Base64.encodeBase64String("val1".getBytes(StandardCharsets.UTF_8));
final String fam2Base64 = Base64.encodeBase64String("nifi".getBytes(StandardCharsets.UTF_8));
final String qual2Base64 = Base64.encodeBase64String("cq2".getBytes(StandardCharsets.UTF_8));
final String val2Base64 = Base64.encodeBase64String("val2".getBytes(StandardCharsets.UTF_8));
final MockFlowFile flowFile = runner.getFlowFilesForRelationship(FetchHBaseRow.REL_SUCCESS).get(0);
flowFile.assertContentEquals("{\"row\":\"" + rowBase64 + "\", \"cells\": [" +
"{\"fam\":\"" + fam1Base64 + "\",\"qual\":\"" + qual1Base64 + "\",\"val\":\"" + val1Base64 + "\",\"ts\":" + ts1 + "}, " +
"{\"fam\":\"" + fam2Base64 + "\",\"qual\":\"" + qual2Base64 + "\",\"val\":\"" + val2Base64 + "\",\"ts\":" + ts1 + "}]}");
assertEquals(1, hBaseClientService.getNumScans());
}
@Test
public void testFetchToContentWithQualifierAndValueJSON() {
final Map<String, String> cells = new LinkedHashMap<>();
cells.put("cq1", "val1");
cells.put("cq2", "val2");
hBaseClientService.addResult("row1", cells, System.currentTimeMillis());
runner.setProperty(FetchHBaseRow.TABLE_NAME, "table1");
runner.setProperty(FetchHBaseRow.ROW_ID, "row1");
runner.setProperty(FetchHBaseRow.DESTINATION, FetchHBaseRow.DESTINATION_CONTENT);
runner.setProperty(FetchHBaseRow.JSON_FORMAT, FetchHBaseRow.JSON_FORMAT_QUALIFIER_AND_VALUE);
runner.enqueue("trigger flow file");
runner.run();
runner.assertTransferCount(FetchHBaseRow.REL_FAILURE, 0);
runner.assertTransferCount(FetchHBaseRow.REL_SUCCESS, 1);
runner.assertTransferCount(FetchHBaseRow.REL_NOT_FOUND, 0);
final MockFlowFile flowFile = runner.getFlowFilesForRelationship(FetchHBaseRow.REL_SUCCESS).get(0);
flowFile.assertContentEquals("{\"cq1\":\"val1\", \"cq2\":\"val2\"}");
assertEquals(1, hBaseClientService.getNumScans());
}
@Test
public void testFetchWithExpressionLanguage() {
final Map<String, String> cells = new HashMap<>();
cells.put("cq1", "val1");
cells.put("cq2", "val2");
final long ts1 = 123456789;
hBaseClientService.addResult("row1", cells, ts1);
runner.setProperty(FetchHBaseRow.TABLE_NAME, "${hbase.table}");
runner.setProperty(FetchHBaseRow.ROW_ID, "${hbase.row}");
runner.setProperty(FetchHBaseRow.COLUMNS, "${hbase.cols}");
runner.setProperty(FetchHBaseRow.DESTINATION, FetchHBaseRow.DESTINATION_CONTENT);
final Map<String, String> attributes = new HashMap<>();
attributes.put("hbase.table", "table1");
attributes.put("hbase.row", "row1");
attributes.put("hbase.cols", "nifi:cq2");
runner.enqueue("trigger flow file", attributes);
runner.run();
runner.assertTransferCount(FetchHBaseRow.REL_FAILURE, 0);
runner.assertTransferCount(FetchHBaseRow.REL_SUCCESS, 1);
runner.assertTransferCount(FetchHBaseRow.REL_NOT_FOUND, 0);
final MockFlowFile flowFile = runner.getFlowFilesForRelationship(FetchHBaseRow.REL_SUCCESS).get(0);
flowFile.assertContentEquals("{\"row\":\"row1\", \"cells\": [{\"fam\":\"nifi\",\"qual\":\"cq2\",\"val\":\"val2\",\"ts\":" + ts1 + "}]}");
assertEquals(1, hBaseClientService.getNumScans());
}
@Test
public void testFetchWhenScanThrowsException() {
hBaseClientService.setThrowException(true);
runner.setProperty(FetchHBaseRow.TABLE_NAME, "table1");
runner.setProperty(FetchHBaseRow.ROW_ID, "row1");
runner.setProperty(FetchHBaseRow.DESTINATION, FetchHBaseRow.DESTINATION_ATTRIBUTES);
runner.enqueue("trigger flow file");
runner.run();
runner.assertTransferCount(FetchHBaseRow.REL_FAILURE, 1);
runner.assertTransferCount(FetchHBaseRow.REL_SUCCESS, 0);
runner.assertTransferCount(FetchHBaseRow.REL_NOT_FOUND, 0);
assertEquals(0, hBaseClientService.getNumScans());
}
}

View File

@ -1,376 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.nifi.hbase;
import org.apache.nifi.controller.AbstractControllerService;
import org.apache.nifi.distributed.cache.client.Deserializer;
import org.apache.nifi.distributed.cache.client.DistributedMapCacheClient;
import org.apache.nifi.distributed.cache.client.Serializer;
import org.apache.nifi.hbase.GetHBase.ScanResult;
import org.apache.nifi.hbase.scan.Column;
import org.apache.nifi.reporting.InitializationException;
import org.apache.nifi.util.TestRunner;
import org.apache.nifi.util.TestRunners;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
import java.io.IOException;
import java.nio.charset.CharacterCodingException;
import java.nio.charset.Charset;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.ConcurrentMap;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertNotNull;
import static org.junit.jupiter.api.Assertions.assertNotSame;
import static org.junit.jupiter.api.Assertions.assertTrue;
public class TestGetHBase {
private TestRunner runner;
private MockGetHBase proc;
private MockCacheClient cacheClient;
private MockHBaseClientService hBaseClient;
@BeforeEach
public void setup() throws InitializationException {
proc = new MockGetHBase();
runner = TestRunners.newTestRunner(proc);
cacheClient = new MockCacheClient();
runner.addControllerService("cacheClient", cacheClient);
runner.enableControllerService(cacheClient);
hBaseClient = new MockHBaseClientService();
runner.addControllerService("hbaseClient", hBaseClient);
runner.enableControllerService(hBaseClient);
runner.setProperty(GetHBase.TABLE_NAME, "nifi");
runner.setProperty(GetHBase.HBASE_CLIENT_SERVICE, "hbaseClient");
runner.setProperty(GetHBase.AUTHORIZATIONS, "");
runner.setValidateExpressionUsage(true);
}
@Test
public void testColumnsValidation() {
runner.assertValid();
runner.setProperty(GetHBase.COLUMNS, "cf1:cq1");
runner.assertValid();
runner.setProperty(GetHBase.COLUMNS, "cf1");
runner.assertValid();
runner.setProperty(GetHBase.COLUMNS, "cf1:cq1,cf2:cq2,cf3:cq3");
runner.assertValid();
runner.setProperty(GetHBase.COLUMNS, "cf1,cf2:cq1,cf3");
runner.assertValid();
runner.setProperty(GetHBase.COLUMNS, "cf1 cf2,cf3");
runner.assertNotValid();
runner.setProperty(GetHBase.COLUMNS, "cf1:,cf2,cf3");
runner.assertNotValid();
runner.setProperty(GetHBase.COLUMNS, "cf1:cq1,");
runner.assertNotValid();
}
@Test
public void testRowCounts() {
final long now = System.currentTimeMillis();
final Map<String, String> cells = new HashMap<>();
cells.put("greeting", "hello");
cells.put("name", "nifi");
hBaseClient.addResult("row0", cells, now - 2);
hBaseClient.addResult("row1", cells, now - 1);
hBaseClient.addResult("row2", cells, now - 1);
hBaseClient.addResult("row3", cells, now);
runner.run(100);
runner.assertAllFlowFilesTransferred(GetHBase.REL_SUCCESS, 4);
hBaseClient.addResult("row4", cells, now + 1);
runner.run();
runner.assertAllFlowFilesTransferred(GetHBase.REL_SUCCESS, 5);
}
@Test
public void testPersistAndRecoverFromLocalState() {
final long now = System.currentTimeMillis();
final Map<String, String> cells = new HashMap<>();
cells.put("greeting", "hello");
cells.put("name", "nifi");
hBaseClient.addResult("row0", cells, now - 2);
hBaseClient.addResult("row1", cells, now - 1);
hBaseClient.addResult("row2", cells, now - 1);
hBaseClient.addResult("row3", cells, now);
runner.run(100);
runner.assertAllFlowFilesTransferred(GetHBase.REL_SUCCESS, 4);
hBaseClient.addResult("row4", cells, now + 1);
runner.run();
runner.assertAllFlowFilesTransferred(GetHBase.REL_SUCCESS, 5);
runner.clearTransferState();
proc = new MockGetHBase();
hBaseClient.addResult("row0", cells, now - 2);
hBaseClient.addResult("row1", cells, now - 1);
hBaseClient.addResult("row2", cells, now - 1);
hBaseClient.addResult("row3", cells, now);
runner.run(100);
runner.assertAllFlowFilesTransferred(GetHBase.REL_SUCCESS, 0);
}
@Test
public void testBecomePrimaryWithNoLocalState() {
final long now = System.currentTimeMillis();
final Map<String, String> cells = new HashMap<>();
cells.put("greeting", "hello");
cells.put("name", "nifi");
hBaseClient.addResult("row0", cells, now - 2);
hBaseClient.addResult("row1", cells, now - 1);
hBaseClient.addResult("row2", cells, now - 1);
hBaseClient.addResult("row3", cells, now);
runner.run(100);
runner.assertAllFlowFilesTransferred(GetHBase.REL_SUCCESS, 4);
hBaseClient.addResult("row4", cells, now + 1);
runner.run();
runner.assertAllFlowFilesTransferred(GetHBase.REL_SUCCESS, 5);
hBaseClient.addResult("row0", cells, now - 2);
hBaseClient.addResult("row1", cells, now - 1);
hBaseClient.addResult("row2", cells, now - 1);
hBaseClient.addResult("row3", cells, now);
hBaseClient.addResult("row4", cells, now + 1);
runner.clearTransferState();
runner.run(100);
runner.assertAllFlowFilesTransferred(GetHBase.REL_SUCCESS, 0);
}
@Test
public void testChangeTableNameClearsState() {
final long now = System.currentTimeMillis();
final Map<String, String> cells = new HashMap<>();
cells.put("greeting", "hello");
cells.put("name", "nifi");
hBaseClient.addResult("row0", cells, now - 2);
hBaseClient.addResult("row1", cells, now - 1);
hBaseClient.addResult("row2", cells, now - 1);
hBaseClient.addResult("row3", cells, now);
runner.run(100);
runner.assertAllFlowFilesTransferred(GetHBase.REL_SUCCESS, 4);
// change the table name and run again, should get all the data coming out
// again because previous state will be wiped
runner.setProperty(GetHBase.TABLE_NAME, "otherTable");
hBaseClient.addResult("row0", cells, now - 2);
hBaseClient.addResult("row1", cells, now - 1);
hBaseClient.addResult("row2", cells, now - 1);
hBaseClient.addResult("row3", cells, now);
runner.run(100);
runner.assertAllFlowFilesTransferred(GetHBase.REL_SUCCESS, 4);
}
@Test
public void testInitialTimeCurrentTime() {
runner.setProperty(GetHBase.INITIAL_TIMERANGE, GetHBase.CURRENT_TIME);
final long now = System.currentTimeMillis();
final Map<String, String> cells = new HashMap<>();
cells.put("greeting", "hello");
cells.put("name", "nifi");
hBaseClient.addResult("row0", cells, now - 4000);
hBaseClient.addResult("row1", cells, now - 3000);
hBaseClient.addResult("row2", cells, now - 2000);
hBaseClient.addResult("row3", cells, now - 1000);
// should not get any output because the mock results have a time before current time
runner.run(100);
runner.assertAllFlowFilesTransferred(GetHBase.REL_SUCCESS, 0);
}
@Test
public void testParseColumns() throws IOException {
runner.setProperty(GetHBase.COLUMNS, "cf1,cf2:cq1,cf3");
proc.parseColumns(runner.getProcessContext());
final List<Column> expectedCols = new ArrayList<>();
expectedCols.add(new Column("cf1".getBytes(Charset.forName("UTF-8")), null));
expectedCols.add(new Column("cf2".getBytes(Charset.forName("UTF-8")), "cq1".getBytes(Charset.forName("UTF-8"))));
expectedCols.add(new Column("cf3".getBytes(Charset.forName("UTF-8")), null));
final List<Column> actualColumns = proc.getColumns();
assertNotNull(actualColumns);
assertEquals(expectedCols.size(), actualColumns.size());
for (final Column expectedCol : expectedCols) {
boolean found = false;
for (final Column providedCol : actualColumns) {
if (expectedCol.equals(providedCol)) {
found = true;
break;
}
}
assertTrue(found, "Didn't find expected column");
}
}
@Test
public void testCustomValidate() throws CharacterCodingException {
runner.setProperty(GetHBase.FILTER_EXPRESSION, "PrefixFilter ('Row') AND PageFilter (1) AND FirstKeyOnlyFilter ()");
runner.assertValid();
runner.setProperty(GetHBase.COLUMNS, "colA");
runner.assertNotValid();
}
@Test
public void testScanResultConvert() {
final long timestamp = 14L;
final Map<String, Set<String>> cellHashes = new LinkedHashMap<>();
final Set<String> row1Cells = new HashSet<>();
row1Cells.add("hello");
row1Cells.add("there");
cellHashes.put("abc", row1Cells);
final Set<String> row2Cells = new HashSet<>();
row2Cells.add("good-bye");
row2Cells.add("there");
cellHashes.put("xyz", row2Cells);
final ScanResult scanResult = new GetHBase.ScanResult(timestamp, cellHashes);
final Map<String, String> flatMap = scanResult.toFlatMap();
assertEquals(7, flatMap.size());
assertEquals("abc", flatMap.get("row.0"));
final String row0Cell0 = flatMap.get("row.0.0");
final String row0Cell1 = flatMap.get("row.0.1");
assertTrue(row0Cell0.equals("hello") || row0Cell0.equals("there"));
assertTrue(row0Cell1.equals("hello") || row0Cell1.equals("there"));
assertNotSame(row0Cell0, row0Cell1);
assertEquals("xyz", flatMap.get("row.1"));
final String row1Cell0 = flatMap.get("row.1.0");
final String row1Cell1 = flatMap.get("row.1.1");
assertTrue(row1Cell0.equals("good-bye") || row1Cell0.equals("there"));
assertTrue(row1Cell1.equals("good-bye") || row1Cell1.equals("there"));
assertNotSame(row1Cell0, row1Cell1);
final ScanResult reverted = ScanResult.fromFlatMap(flatMap);
assertEquals(timestamp, reverted.getTimestamp());
assertEquals(cellHashes, reverted.getMatchingCells());
}
// Mock processor to override the location of the state file
private static class MockGetHBase extends GetHBase {
@Override
protected int getBatchSize() {
return 2;
}
}
private class MockCacheClient extends AbstractControllerService implements DistributedMapCacheClient {
private final ConcurrentMap<Object, Object> values = new ConcurrentHashMap<>();
private boolean failOnCalls = false;
private void verifyNotFail() throws IOException {
if ( failOnCalls ) {
throw new IOException("Could not call to remote cacheClient because Unit Test marked cacheClient unavailable");
}
}
@Override
public <K, V> boolean putIfAbsent(final K key, final V value, final Serializer<K> keySerializer, final Serializer<V> valueSerializer) throws IOException {
verifyNotFail();
final Object retValue = values.putIfAbsent(key, value);
return (retValue == null);
}
@Override
@SuppressWarnings("unchecked")
public <K, V> V getAndPutIfAbsent(final K key, final V value, final Serializer<K> keySerializer, final Serializer<V> valueSerializer,
final Deserializer<V> valueDeserializer) throws IOException {
verifyNotFail();
return (V) values.putIfAbsent(key, value);
}
@Override
public <K> boolean containsKey(final K key, final Serializer<K> keySerializer) throws IOException {
verifyNotFail();
return values.containsKey(key);
}
@Override
public <K, V> void put(final K key, final V value, final Serializer<K> keySerializer, final Serializer<V> valueSerializer) throws IOException {
verifyNotFail();
values.put(key, value);
}
@Override
@SuppressWarnings("unchecked")
public <K, V> V get(final K key, final Serializer<K> keySerializer, final Deserializer<V> valueDeserializer) throws IOException {
verifyNotFail();
return (V) values.get(key);
}
@Override
public void close() {
}
@Override
public <K> boolean remove(final K key, final Serializer<K> serializer) throws IOException {
verifyNotFail();
values.remove(key);
return true;
}
}
}

View File

@ -1,209 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.nifi.hbase;
import org.apache.nifi.hbase.scan.HBaseRegion;
import org.apache.nifi.reporting.InitializationException;
import org.apache.nifi.util.MockFlowFile;
import org.apache.nifi.util.TestRunner;
import org.apache.nifi.util.TestRunners;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.api.extension.ExtendWith;
import org.mockito.junit.jupiter.MockitoExtension;
import java.nio.charset.StandardCharsets;
import java.util.Arrays;
import java.util.List;
import static org.junit.jupiter.api.Assertions.assertEquals;
@ExtendWith(MockitoExtension.class)
public class TestListHBaseRegions {
private static final String TABLE_NAME = "nifi";
private static final String HBASE_CLIENT_SERVICE_NAME = "hBaseClientService";
private TestRunner runner;
private ListHBaseRegions proc;
private MockHBaseClientService hBaseClientService;
@BeforeEach
public void setup() throws InitializationException {
proc = new ListHBaseRegions();
runner = TestRunners.newTestRunner(proc);
hBaseClientService = new MockHBaseClientService();
runner.addControllerService(HBASE_CLIENT_SERVICE_NAME, hBaseClientService);
runner.enableControllerService(hBaseClientService);
runner.setProperty(ListHBaseRegions.TABLE_NAME, TABLE_NAME);
runner.setProperty(ListHBaseRegions.HBASE_CLIENT_SERVICE, HBASE_CLIENT_SERVICE_NAME);
}
@Test
public void testAllFlowFilesToSuccess() throws HBaseClientException {
runner.setProperty(ListHBaseRegions.ROUTE_DEGENERATE_REGIONS, "false");
runner.assertValid();
final String startRowKey1 = "1";
final String endRowKey1 = "5";
final String regionName1 = "region-1";
final long regionId1 = 1L;
final boolean isDegenerate1 = false;
final HBaseRegion hBaseRegion1 = new HBaseRegion(
startRowKey1.getBytes(StandardCharsets.UTF_8),
endRowKey1.getBytes(StandardCharsets.UTF_8),
regionName1,
regionId1,
isDegenerate1
);
// this is a "degenerate" region where startRowKey > endRowKey
final String startRowKey2 = "10";
final String endRowKey2 = "6";
final String regionName2 = "region-2";
final long regionId2 = 2L;
final boolean isDegenerate2 = true;
final HBaseRegion hBaseRegion2 = new HBaseRegion(
startRowKey2.getBytes(StandardCharsets.UTF_8),
endRowKey2.getBytes(StandardCharsets.UTF_8),
regionName2,
regionId2,
isDegenerate2
);
final List<HBaseRegion> regions = Arrays.asList(hBaseRegion1, hBaseRegion2);
hBaseClientService.addHBaseRegions(regions);
runner.run(1);
runner.assertAllFlowFilesTransferred(ListHBaseRegions.REL_SUCCESS, 2);
final List<MockFlowFile> flowFiles = runner.getFlowFilesForRelationship(ListHBaseRegions.REL_SUCCESS);
assertEquals(String.valueOf(regionId1), flowFiles.get(0).getAttribute(ListHBaseRegions.HBASE_REGION_ID_ATTR));
assertEquals(regionName1, flowFiles.get(0).getAttribute(ListHBaseRegions.HBASE_REGION_NAME_ATTR));
assertEquals(startRowKey1, flowFiles.get(0).getAttribute(ListHBaseRegions.HBASE_REGION_START_ROW_ATTR));
assertEquals(endRowKey1, flowFiles.get(0).getAttribute(ListHBaseRegions.HBASE_REGION_END_ROW_ATTR));
assertEquals(String.valueOf(regionId2), flowFiles.get(1).getAttribute(ListHBaseRegions.HBASE_REGION_ID_ATTR));
assertEquals(regionName2, flowFiles.get(1).getAttribute(ListHBaseRegions.HBASE_REGION_NAME_ATTR));
assertEquals(startRowKey2, flowFiles.get(1).getAttribute(ListHBaseRegions.HBASE_REGION_START_ROW_ATTR));
assertEquals(endRowKey2, flowFiles.get(1).getAttribute(ListHBaseRegions.HBASE_REGION_END_ROW_ATTR));
}
@Test
public void testDegenerateRegionsToDegenerateRelationship() throws HBaseClientException {
runner.setProperty(ListHBaseRegions.ROUTE_DEGENERATE_REGIONS, "true");
runner.assertValid();
final String startRowKey1 = "1";
final String endRowKey1 = "5";
final String regionName1 = "region-1";
final long regionId1 = 1L;
final boolean isDegenerate1 = false;
final HBaseRegion hBaseRegion1 = new HBaseRegion(
startRowKey1.getBytes(StandardCharsets.UTF_8),
endRowKey1.getBytes(StandardCharsets.UTF_8),
regionName1,
regionId1,
isDegenerate1
);
// this is a "degenerate" region where startRowKey > endRowKey
final String startRowKey2 = "10";
final String endRowKey2 = "6";
final String regionName2 = "region-2";
final long regionId2 = 2L;
final boolean isDegenerate2 = true;
final HBaseRegion hBaseRegion2 = new HBaseRegion(
startRowKey2.getBytes(StandardCharsets.UTF_8),
endRowKey2.getBytes(StandardCharsets.UTF_8),
regionName2,
regionId2,
isDegenerate2
);
final List<HBaseRegion> regions = Arrays.asList(hBaseRegion1, hBaseRegion2);
hBaseClientService.addHBaseRegions(regions);
runner.run(1);
runner.assertTransferCount(ListHBaseRegions.REL_SUCCESS, 1);
final List<MockFlowFile> successFlowFiles = runner.getFlowFilesForRelationship(ListHBaseRegions.REL_SUCCESS);
assertEquals(String.valueOf(regionId1), successFlowFiles.get(0).getAttribute(ListHBaseRegions.HBASE_REGION_ID_ATTR));
assertEquals(regionName1, successFlowFiles.get(0).getAttribute(ListHBaseRegions.HBASE_REGION_NAME_ATTR));
assertEquals(startRowKey1, successFlowFiles.get(0).getAttribute(ListHBaseRegions.HBASE_REGION_START_ROW_ATTR));
assertEquals(endRowKey1, successFlowFiles.get(0).getAttribute(ListHBaseRegions.HBASE_REGION_END_ROW_ATTR));
runner.assertTransferCount(ListHBaseRegions.REL_DEGENERATE, 1);
final List<MockFlowFile> degenerateFlowFiles = runner.getFlowFilesForRelationship(ListHBaseRegions.REL_DEGENERATE);
assertEquals(String.valueOf(regionId2), degenerateFlowFiles.get(0).getAttribute(ListHBaseRegions.HBASE_REGION_ID_ATTR));
assertEquals(regionName2, degenerateFlowFiles.get(0).getAttribute(ListHBaseRegions.HBASE_REGION_NAME_ATTR));
assertEquals(startRowKey2, degenerateFlowFiles.get(0).getAttribute(ListHBaseRegions.HBASE_REGION_START_ROW_ATTR));
assertEquals(endRowKey2, degenerateFlowFiles.get(0).getAttribute(ListHBaseRegions.HBASE_REGION_END_ROW_ATTR));
}
@Test
public void testShouldNotRouteToDegenerateIfNoDegenerateRegions() throws HBaseClientException {
runner.setProperty(ListHBaseRegions.ROUTE_DEGENERATE_REGIONS, "false");
runner.assertValid();
final String startRowKey1 = "1";
final String endRowKey1 = "5";
final String regionName1 = "region-1";
final long regionId1 = 1L;
final boolean isDegenerate1 = false;
final HBaseRegion hBaseRegion1 = new HBaseRegion(
startRowKey1.getBytes(StandardCharsets.UTF_8),
endRowKey1.getBytes(StandardCharsets.UTF_8),
regionName1,
regionId1,
isDegenerate1
);
final String startRowKey2 = "5";
final String endRowKey2 = "10";
final String regionName2 = "region-2";
final long regionId2 = 2L;
final boolean isDegenerate2 = false;
final HBaseRegion hBaseRegion2 = new HBaseRegion(
startRowKey2.getBytes(StandardCharsets.UTF_8),
endRowKey2.getBytes(StandardCharsets.UTF_8),
regionName2,
regionId2,
isDegenerate2
);
final List<HBaseRegion> regions = Arrays.asList(hBaseRegion1, hBaseRegion2);
hBaseClientService.addHBaseRegions(regions);
runner.run(1);
runner.assertAllFlowFilesTransferred(ListHBaseRegions.REL_SUCCESS, 2);
final List<MockFlowFile> flowFiles = runner.getFlowFilesForRelationship(ListHBaseRegions.REL_SUCCESS);
assertEquals(String.valueOf(regionId1), flowFiles.get(0).getAttribute(ListHBaseRegions.HBASE_REGION_ID_ATTR));
assertEquals(regionName1, flowFiles.get(0).getAttribute(ListHBaseRegions.HBASE_REGION_NAME_ATTR));
assertEquals(startRowKey1, flowFiles.get(0).getAttribute(ListHBaseRegions.HBASE_REGION_START_ROW_ATTR));
assertEquals(endRowKey1, flowFiles.get(0).getAttribute(ListHBaseRegions.HBASE_REGION_END_ROW_ATTR));
assertEquals(String.valueOf(regionId2), flowFiles.get(1).getAttribute(ListHBaseRegions.HBASE_REGION_ID_ATTR));
assertEquals(regionName2, flowFiles.get(1).getAttribute(ListHBaseRegions.HBASE_REGION_NAME_ATTR));
assertEquals(startRowKey2, flowFiles.get(1).getAttribute(ListHBaseRegions.HBASE_REGION_START_ROW_ATTR));
assertEquals(endRowKey2, flowFiles.get(1).getAttribute(ListHBaseRegions.HBASE_REGION_END_ROW_ATTR));
}
}

View File

@ -1,409 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.nifi.hbase;
import org.apache.nifi.hbase.put.PutColumn;
import org.apache.nifi.hbase.put.PutFlowFile;
import org.apache.nifi.reporting.InitializationException;
import org.apache.nifi.util.MockFlowFile;
import org.apache.nifi.util.TestRunner;
import org.apache.nifi.util.TestRunners;
import org.junit.jupiter.api.Test;
import java.io.IOException;
import java.nio.charset.StandardCharsets;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertNotNull;
public class TestPutHBaseCell {
@Test
public void testSingleFlowFileNoTimestamp() throws IOException, InitializationException {
final String tableName = "nifi";
final String row = "row1";
final String columnFamily = "family1";
final String columnQualifier = "qualifier1";
final TestRunner runner = TestRunners.newTestRunner(PutHBaseCell.class);
runner.setProperty(PutHBaseCell.TABLE_NAME, tableName);
runner.setProperty(PutHBaseCell.ROW_ID, row);
runner.setProperty(PutHBaseCell.COLUMN_FAMILY, columnFamily);
runner.setProperty(PutHBaseCell.COLUMN_QUALIFIER, columnQualifier);
runner.setProperty(PutHBaseCell.BATCH_SIZE, "1");
final MockHBaseClientService hBaseClient = getHBaseClientService(runner);
final String content = "some content";
runner.enqueue(content.getBytes("UTF-8"));
runner.run();
runner.assertAllFlowFilesTransferred(PutHBaseCell.REL_SUCCESS);
final MockFlowFile outFile = runner.getFlowFilesForRelationship(PutHBaseCell.REL_SUCCESS).get(0);
outFile.assertContentEquals(content);
assertNotNull(hBaseClient.getFlowFilePuts());
assertEquals(1, hBaseClient.getFlowFilePuts().size());
List<PutFlowFile> puts = hBaseClient.getFlowFilePuts().get(tableName);
assertEquals(1, puts.size());
verifyPut(row, columnFamily, columnQualifier, null, content, puts.get(0));
assertEquals(1, runner.getProvenanceEvents().size());
}
@Test
public void testSingleFlowFileWithTimestamp() throws IOException, InitializationException {
final String tableName = "nifi";
final String row = "row1";
final String columnFamily = "family1";
final String columnQualifier = "qualifier1";
final Long timestamp = 1L;
final TestRunner runner = TestRunners.newTestRunner(PutHBaseCell.class);
runner.setProperty(PutHBaseCell.TABLE_NAME, tableName);
runner.setProperty(PutHBaseCell.ROW_ID, row);
runner.setProperty(PutHBaseCell.COLUMN_FAMILY, columnFamily);
runner.setProperty(PutHBaseCell.COLUMN_QUALIFIER, columnQualifier);
runner.setProperty(PutHBaseCell.TIMESTAMP, timestamp.toString());
runner.setProperty(PutHBaseCell.BATCH_SIZE, "1");
final MockHBaseClientService hBaseClient = getHBaseClientService(runner);
final String content = "some content";
runner.enqueue(content.getBytes("UTF-8"));
runner.run();
runner.assertAllFlowFilesTransferred(PutHBaseCell.REL_SUCCESS);
final MockFlowFile outFile = runner.getFlowFilesForRelationship(PutHBaseCell.REL_SUCCESS).get(0);
outFile.assertContentEquals(content);
assertNotNull(hBaseClient.getFlowFilePuts());
assertEquals(1, hBaseClient.getFlowFilePuts().size());
List<PutFlowFile> puts = hBaseClient.getFlowFilePuts().get(tableName);
assertEquals(1, puts.size());
verifyPut(row, columnFamily, columnQualifier, timestamp, content, puts.get(0));
assertEquals(1, runner.getProvenanceEvents().size());
}
@Test
public void testSingleFlowFileWithInvalidTimestamp() throws IOException, InitializationException {
final String tableName = "nifi";
final String row = "row1";
final String columnFamily = "family1";
final String columnQualifier = "qualifier1";
final String timestamp = "not-a-timestamp";
final PutHBaseCell proc = new PutHBaseCell();
final TestRunner runner = getTestRunnerWithEL(proc);
runner.setProperty(PutHBaseCell.TIMESTAMP, "${hbase.timestamp}");
runner.setProperty(PutHBaseCell.BATCH_SIZE, "1");
final MockHBaseClientService hBaseClient = getHBaseClientService(runner);
final String content = "some content";
final Map<String, String> attributes = getAttributeMapWithEL(tableName, row, columnFamily, columnQualifier);
attributes.put("hbase.timestamp", timestamp);
runner.enqueue(content.getBytes("UTF-8"), attributes);
runner.run();
runner.assertAllFlowFilesTransferred(PutHBaseCell.REL_FAILURE, 1);
}
@Test
public void testSingleFlowFileWithEL() throws IOException, InitializationException {
final String tableName = "nifi";
final String row = "row1";
final String columnFamily = "family1";
final String columnQualifier = "qualifier1";
final Long timestamp = 1L;
final PutHBaseCell proc = new PutHBaseCell();
final TestRunner runner = getTestRunnerWithEL(proc);
runner.setProperty(PutHBaseCell.TIMESTAMP, "${hbase.timestamp}");
runner.setProperty(PutHBaseCell.BATCH_SIZE, "1");
final MockHBaseClientService hBaseClient = getHBaseClientService(runner);
final String content = "some content";
final Map<String, String> attributes = getAttributeMapWithEL(tableName, row, columnFamily, columnQualifier);
attributes.put("hbase.timestamp", timestamp.toString());
runner.enqueue(content.getBytes("UTF-8"), attributes);
runner.run();
runner.assertAllFlowFilesTransferred(PutHBaseCell.REL_SUCCESS);
final MockFlowFile outFile = runner.getFlowFilesForRelationship(PutHBaseCell.REL_SUCCESS).get(0);
outFile.assertContentEquals(content);
assertNotNull(hBaseClient.getFlowFilePuts());
assertEquals(1, hBaseClient.getFlowFilePuts().size());
List<PutFlowFile> puts = hBaseClient.getFlowFilePuts().get(tableName);
assertEquals(1, puts.size());
verifyPut(row, columnFamily, columnQualifier, timestamp, content, puts.get(0));
assertEquals(1, runner.getProvenanceEvents().size());
}
@Test
public void testSingleFlowFileWithELMissingAttributes() throws IOException, InitializationException {
final PutHBaseCell proc = new PutHBaseCell();
final TestRunner runner = getTestRunnerWithEL(proc);
runner.setProperty(PutHBaseCell.BATCH_SIZE, "1");
final MockHBaseClientService hBaseClient = new MockHBaseClientService();
runner.addControllerService("hbaseClient", hBaseClient);
runner.enableControllerService(hBaseClient);
runner.setProperty(PutHBaseCell.HBASE_CLIENT_SERVICE, "hbaseClient");
getHBaseClientService(runner);
final String content = "some content";
runner.enqueue(content.getBytes("UTF-8"), new HashMap<String, String>());
runner.run();
runner.assertTransferCount(PutHBaseCell.REL_SUCCESS, 0);
runner.assertTransferCount(PutHBaseCell.REL_FAILURE, 1);
assertEquals(0, runner.getProvenanceEvents().size());
}
@Test
public void testMultipleFlowFileWithELOneMissingAttributes() throws IOException, InitializationException {
final PutHBaseCell proc = new PutHBaseCell();
final TestRunner runner = getTestRunnerWithEL(proc);
runner.setProperty(PutHBaseCell.BATCH_SIZE, "10");
final MockHBaseClientService hBaseClient = new MockHBaseClientService();
runner.addControllerService("hbaseClient", hBaseClient);
runner.enableControllerService(hBaseClient);
runner.setProperty(PutHBaseCell.HBASE_CLIENT_SERVICE, "hbaseClient");
getHBaseClientService(runner);
// this one will go to failure
final String content = "some content";
runner.enqueue(content.getBytes("UTF-8"), new HashMap<String, String>());
// this will go to success
final String content2 = "some content2";
final Map<String, String> attributes = getAttributeMapWithEL("table", "row", "cf", "cq");
runner.enqueue(content2.getBytes("UTF-8"), attributes);
runner.run();
runner.assertTransferCount(PutHBaseCell.REL_SUCCESS, 1);
runner.assertTransferCount(PutHBaseCell.REL_FAILURE, 1);
assertEquals(1, runner.getProvenanceEvents().size());
}
@Test
public void testMultipleFlowFilesSameTableDifferentRow() throws IOException, InitializationException {
final String tableName = "nifi";
final String row1 = "row1";
final String row2 = "row2";
final String columnFamily = "family1";
final String columnQualifier = "qualifier1";
final PutHBaseCell proc = new PutHBaseCell();
final TestRunner runner = getTestRunnerWithEL(proc);
final MockHBaseClientService hBaseClient = getHBaseClientService(runner);
final String content1 = "some content1";
final Map<String, String> attributes1 = getAttributeMapWithEL(tableName, row1, columnFamily, columnQualifier);
runner.enqueue(content1.getBytes("UTF-8"), attributes1);
final String content2 = "some content1";
final Map<String, String> attributes2 = getAttributeMapWithEL(tableName, row2, columnFamily, columnQualifier);
runner.enqueue(content2.getBytes("UTF-8"), attributes2);
runner.run();
runner.assertAllFlowFilesTransferred(PutHBaseCell.REL_SUCCESS);
final MockFlowFile outFile = runner.getFlowFilesForRelationship(PutHBaseCell.REL_SUCCESS).get(0);
outFile.assertContentEquals(content1);
assertNotNull(hBaseClient.getFlowFilePuts());
assertEquals(1, hBaseClient.getFlowFilePuts().size());
List<PutFlowFile> puts = hBaseClient.getFlowFilePuts().get(tableName);
assertEquals(2, puts.size());
verifyPut(row1, columnFamily, columnQualifier, null, content1, puts.get(0));
verifyPut(row2, columnFamily, columnQualifier, null, content2, puts.get(1));
assertEquals(2, runner.getProvenanceEvents().size());
}
@Test
public void testMultipleFlowFilesSameTableDifferentRowFailure() throws IOException, InitializationException {
final String tableName = "nifi";
final String row1 = "row1";
final String row2 = "row2";
final String columnFamily = "family1";
final String columnQualifier = "qualifier1";
final PutHBaseCell proc = new PutHBaseCell();
final TestRunner runner = getTestRunnerWithEL(proc);
final MockHBaseClientService hBaseClient = getHBaseClientService(runner);
hBaseClient.setThrowException(true);
final String content1 = "some content1";
final Map<String, String> attributes1 = getAttributeMapWithEL(tableName, row1, columnFamily, columnQualifier);
runner.enqueue(content1.getBytes("UTF-8"), attributes1);
final String content2 = "some content1";
final Map<String, String> attributes2 = getAttributeMapWithEL(tableName, row2, columnFamily, columnQualifier);
runner.enqueue(content2.getBytes("UTF-8"), attributes2);
runner.run();
runner.assertAllFlowFilesTransferred(PutHBaseCell.REL_FAILURE, 2);
assertEquals(0, runner.getProvenanceEvents().size());
}
@Test
public void testMultipleFlowFilesSameTableSameRow() throws IOException, InitializationException {
final String tableName = "nifi";
final String row = "row1";
final String columnFamily = "family1";
final String columnQualifier = "qualifier1";
final PutHBaseCell proc = new PutHBaseCell();
final TestRunner runner = getTestRunnerWithEL(proc);
final MockHBaseClientService hBaseClient = getHBaseClientService(runner);
final String content1 = "some content1";
final Map<String, String> attributes1 = getAttributeMapWithEL(tableName, row, columnFamily, columnQualifier);
runner.enqueue(content1.getBytes("UTF-8"), attributes1);
final String content2 = "some content1";
runner.enqueue(content2.getBytes("UTF-8"), attributes1);
runner.run();
runner.assertAllFlowFilesTransferred(PutHBaseCell.REL_SUCCESS);
final MockFlowFile outFile = runner.getFlowFilesForRelationship(PutHBaseCell.REL_SUCCESS).get(0);
outFile.assertContentEquals(content1);
assertNotNull(hBaseClient.getFlowFilePuts());
assertEquals(1, hBaseClient.getFlowFilePuts().size());
List<PutFlowFile> puts = hBaseClient.getFlowFilePuts().get(tableName);
assertEquals(2, puts.size());
verifyPut(row, columnFamily, columnQualifier, null, content1, puts.get(0));
verifyPut(row, columnFamily, columnQualifier, null, content2, puts.get(1));
assertEquals(2, runner.getProvenanceEvents().size());
}
@Test
public void testSingleFlowFileWithBinaryRowKey() throws IOException, InitializationException {
final String tableName = "nifi";
final String row = "\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00" +
"\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00" +
"\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00" +
"\\x00\\x00\\x00\\x00\\x00\\x01\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00" +
"\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x01\\x01\\x00\\x00\\x00\\x00\\x00" +
"\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00" +
"\\x00\\x00\\x00\\x01\\x00\\x00\\x01\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00" +
"\\x01\\x00\\x00\\x01\\x00\\x00\\x00\\x00\\x01\\x01\\x01\\x00\\x01\\x00\\x01\\x01\\x01\\x00\\x00\\x00" +
"\\x00\\x00\\x00\\x01\\x01\\x01\\x01\\x00\\x00\\x00\\x00\\x00\\x00\\x01\\x01\\x00\\x01\\x00\\x01\\x00" +
"\\x00\\x01\\x01\\x01\\x01\\x00\\x00\\x01\\x01\\x01\\x00\\x01\\x00\\x00";
final String columnFamily = "family1";
final String columnQualifier = "qualifier1";
final TestRunner runner = TestRunners.newTestRunner(PutHBaseCell.class);
runner.setProperty(PutHBaseCell.TABLE_NAME, tableName);
runner.setProperty(PutHBaseCell.ROW_ID, row);
runner.setProperty(PutHBaseCell.ROW_ID_ENCODING_STRATEGY, PutHBaseCell.ROW_ID_ENCODING_BINARY.getValue());
runner.setProperty(PutHBaseCell.COLUMN_FAMILY, columnFamily);
runner.setProperty(PutHBaseCell.COLUMN_QUALIFIER, columnQualifier);
runner.setProperty(PutHBaseCell.BATCH_SIZE, "1");
final MockHBaseClientService hBaseClient = getHBaseClientService(runner);
final byte[] expectedRowKey = hBaseClient.toBytesBinary(row);
final String content = "some content";
runner.enqueue(content.getBytes("UTF-8"));
runner.run();
runner.assertAllFlowFilesTransferred(PutHBaseCell.REL_SUCCESS);
final MockFlowFile outFile = runner.getFlowFilesForRelationship(PutHBaseCell.REL_SUCCESS).get(0);
outFile.assertContentEquals(content);
assertNotNull(hBaseClient.getFlowFilePuts());
assertEquals(1, hBaseClient.getFlowFilePuts().size());
List<PutFlowFile> puts = hBaseClient.getFlowFilePuts().get(tableName);
assertEquals(1, puts.size());
verifyPut(expectedRowKey, columnFamily.getBytes(StandardCharsets.UTF_8), columnQualifier.getBytes(StandardCharsets.UTF_8), null, content, puts.get(0));
assertEquals(1, runner.getProvenanceEvents().size());
}
private Map<String, String> getAttributeMapWithEL(String tableName, String row, String columnFamily, String columnQualifier) {
final Map<String, String> attributes1 = new HashMap<>();
attributes1.put("hbase.tableName", tableName);
attributes1.put("hbase.row", row);
attributes1.put("hbase.columnFamily", columnFamily);
attributes1.put("hbase.columnQualifier", columnQualifier);
return attributes1;
}
private TestRunner getTestRunnerWithEL(PutHBaseCell proc) {
final TestRunner runner = TestRunners.newTestRunner(proc);
runner.setProperty(PutHBaseCell.TABLE_NAME, "${hbase.tableName}");
runner.setProperty(PutHBaseCell.ROW_ID, "${hbase.row}");
runner.setProperty(PutHBaseCell.COLUMN_FAMILY, "${hbase.columnFamily}");
runner.setProperty(PutHBaseCell.COLUMN_QUALIFIER, "${hbase.columnQualifier}");
return runner;
}
private MockHBaseClientService getHBaseClientService(TestRunner runner) throws InitializationException {
final MockHBaseClientService hBaseClient = new MockHBaseClientService();
runner.addControllerService("hbaseClient", hBaseClient);
runner.enableControllerService(hBaseClient);
runner.setProperty(PutHBaseCell.HBASE_CLIENT_SERVICE, "hbaseClient");
return hBaseClient;
}
private void verifyPut(String row, String columnFamily, String columnQualifier, Long timestamp, String content, PutFlowFile put) {
verifyPut(row.getBytes(StandardCharsets.UTF_8), columnFamily.getBytes(StandardCharsets.UTF_8),
columnQualifier.getBytes(StandardCharsets.UTF_8), timestamp, content, put);
}
private void verifyPut(byte[] row, byte[] columnFamily, byte[] columnQualifier, Long timestamp, String content, PutFlowFile put) {
assertEquals(new String(row, StandardCharsets.UTF_8), new String(put.getRow(), StandardCharsets.UTF_8));
assertNotNull(put.getColumns());
assertEquals(1, put.getColumns().size());
final PutColumn column = put.getColumns().iterator().next();
assertEquals(new String(columnFamily, StandardCharsets.UTF_8), new String(column.getColumnFamily(), StandardCharsets.UTF_8));
assertEquals(new String(columnQualifier, StandardCharsets.UTF_8), new String(column.getColumnQualifier(), StandardCharsets.UTF_8));
assertEquals(content, new String(column.getBuffer(), StandardCharsets.UTF_8));
assertEquals(timestamp, column.getTimestamp());
}
}

View File

@ -1,511 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.nifi.hbase;
import org.apache.nifi.hbase.put.PutFlowFile;
import org.apache.nifi.provenance.ProvenanceEventRecord;
import org.apache.nifi.provenance.ProvenanceEventType;
import org.apache.nifi.reporting.InitializationException;
import org.apache.nifi.util.MockFlowFile;
import org.apache.nifi.util.TestRunner;
import org.apache.nifi.util.TestRunners;
import org.junit.jupiter.api.Test;
import java.io.IOException;
import java.io.UnsupportedEncodingException;
import java.nio.charset.StandardCharsets;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import static org.apache.nifi.hbase.HBaseTestUtil.getHBaseClientService;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertNotNull;
public class TestPutHBaseJSON {
public static final String DEFAULT_TABLE_NAME = "nifi";
public static final String DEFAULT_ROW = "row1";
public static final String DEFAULT_COLUMN_FAMILY = "family1";
public static final Long DEFAULT_TIMESTAMP = 1L;
@Test
public void testCustomValidate() throws InitializationException {
// missing row id and row id field name should be invalid
TestRunner runner = getTestRunner(DEFAULT_TABLE_NAME, DEFAULT_COLUMN_FAMILY, "1");
getHBaseClientService(runner);
runner.assertNotValid();
// setting both properties should still be invalid
runner = getTestRunner(DEFAULT_TABLE_NAME, DEFAULT_COLUMN_FAMILY, "1");
getHBaseClientService(runner);
runner.setProperty(PutHBaseJSON.ROW_ID, "rowId");
runner.setProperty(PutHBaseJSON.ROW_FIELD_NAME, "rowFieldName");
runner.assertNotValid();
// only a row id field name should make it valid
runner = getTestRunner(DEFAULT_TABLE_NAME, DEFAULT_COLUMN_FAMILY, "1");
getHBaseClientService(runner);
runner.setProperty(PutHBaseJSON.ROW_FIELD_NAME, "rowFieldName");
runner.assertValid();
// only a row id should make it valid
runner = getTestRunner(DEFAULT_TABLE_NAME, DEFAULT_COLUMN_FAMILY, "1");
getHBaseClientService(runner);
runner.setProperty(PutHBaseJSON.ROW_ID, "rowId");
runner.assertValid();
}
@Test
public void testSingleJsonDocAndProvidedRowId() throws IOException, InitializationException {
final TestRunner runner = getTestRunner(DEFAULT_TABLE_NAME, DEFAULT_COLUMN_FAMILY, "1");
final MockHBaseClientService hBaseClient = getHBaseClientService(runner);
runner.setProperty(PutHBaseJSON.ROW_ID, DEFAULT_ROW);
final String content = "{ \"field1\" : \"value1\", \"field2\" : \"value2\" }";
runner.enqueue(content.getBytes("UTF-8"));
runner.run();
runner.assertAllFlowFilesTransferred(PutHBaseCell.REL_SUCCESS);
final MockFlowFile outFile = runner.getFlowFilesForRelationship(PutHBaseCell.REL_SUCCESS).get(0);
outFile.assertContentEquals(content);
assertNotNull(hBaseClient.getFlowFilePuts());
assertEquals(1, hBaseClient.getFlowFilePuts().size());
final List<PutFlowFile> puts = hBaseClient.getFlowFilePuts().get(DEFAULT_TABLE_NAME);
assertEquals(1, puts.size());
final Map<String, byte[]> expectedColumns = new HashMap<>();
expectedColumns.put("field1", hBaseClient.toBytes("value1"));
expectedColumns.put("field2", hBaseClient.toBytes("value2"));
HBaseTestUtil.verifyPut(DEFAULT_ROW, DEFAULT_COLUMN_FAMILY, expectedColumns, puts);
final List<ProvenanceEventRecord> events = runner.getProvenanceEvents();
assertEquals(1, events.size());
final ProvenanceEventRecord event = events.get(0);
assertEquals("hbase://" + DEFAULT_TABLE_NAME + "/" + DEFAULT_ROW, event.getTransitUri());
}
@Test
public void testSingleJsonDocAndProvidedRowIdwithNonString() throws IOException, InitializationException {
final TestRunner runner = getTestRunner(DEFAULT_TABLE_NAME, DEFAULT_COLUMN_FAMILY, "1");
runner.setProperty(PutHBaseJSON.FIELD_ENCODING_STRATEGY, PutHBaseJSON.BYTES_ENCODING_VALUE);
final MockHBaseClientService hBaseClient = getHBaseClientService(runner);
runner.setProperty(PutHBaseJSON.ROW_ID, DEFAULT_ROW);
final String content = "{ \"field1\" : 1.23456, \"field2\" : 2345235, \"field3\" : false }";
runner.enqueue(content.getBytes("UTF-8"));
runner.run();
runner.assertAllFlowFilesTransferred(PutHBaseCell.REL_SUCCESS);
final MockFlowFile outFile = runner.getFlowFilesForRelationship(PutHBaseCell.REL_SUCCESS).get(0);
outFile.assertContentEquals(content);
assertNotNull(hBaseClient.getFlowFilePuts());
assertEquals(1, hBaseClient.getFlowFilePuts().size());
final List<PutFlowFile> puts = hBaseClient.getFlowFilePuts().get(DEFAULT_TABLE_NAME);
assertEquals(1, puts.size());
final Map<String, byte[]> expectedColumns = new HashMap<>();
expectedColumns.put("field1", hBaseClient.toBytes(1.23456d));
expectedColumns.put("field2", hBaseClient.toBytes(2345235l));
expectedColumns.put("field3", hBaseClient.toBytes(false));
HBaseTestUtil.verifyPut(DEFAULT_ROW, DEFAULT_COLUMN_FAMILY, expectedColumns, puts);
final List<ProvenanceEventRecord> events = runner.getProvenanceEvents();
assertEquals(1, events.size());
final ProvenanceEventRecord event = events.get(0);
assertEquals("hbase://" + DEFAULT_TABLE_NAME + "/" + DEFAULT_ROW, event.getTransitUri());
}
@Test
public void testSingJsonDocAndExtractedRowId() throws IOException, InitializationException {
final TestRunner runner = getTestRunner(DEFAULT_TABLE_NAME, DEFAULT_COLUMN_FAMILY, "1");
final MockHBaseClientService hBaseClient = getHBaseClientService(runner);
runner.setProperty(PutHBaseJSON.ROW_FIELD_NAME, "rowField");
final String content = "{ \"rowField\" : \"myRowId\", \"field1\" : \"value1\", \"field2\" : \"value2\" }";
runner.enqueue(content.getBytes(StandardCharsets.UTF_8));
runner.run();
runner.assertAllFlowFilesTransferred(PutHBaseCell.REL_SUCCESS);
final MockFlowFile outFile = runner.getFlowFilesForRelationship(PutHBaseCell.REL_SUCCESS).get(0);
outFile.assertContentEquals(content);
assertNotNull(hBaseClient.getFlowFilePuts());
assertEquals(1, hBaseClient.getFlowFilePuts().size());
final List<PutFlowFile> puts = hBaseClient.getFlowFilePuts().get(DEFAULT_TABLE_NAME);
assertEquals(1, puts.size());
// should be a put with row id of myRowId, and rowField shouldn't end up in the columns
final Map<String, byte[]> expectedColumns1 = new HashMap<>();
expectedColumns1.put("field1", hBaseClient.toBytes("value1"));
expectedColumns1.put("field2", hBaseClient.toBytes("value2"));
HBaseTestUtil.verifyPut("myRowId", DEFAULT_COLUMN_FAMILY, expectedColumns1, puts);
final List<ProvenanceEventRecord> events = runner.getProvenanceEvents();
assertEquals(1, events.size());
HBaseTestUtil.verifyEvent(runner.getProvenanceEvents(), "hbase://" + DEFAULT_TABLE_NAME + "/myRowId", ProvenanceEventType.SEND);
}
@Test
public void testSingJsonDocAndExtractedRowIdMissingField() throws IOException, InitializationException {
final TestRunner runner = getTestRunner(DEFAULT_TABLE_NAME, DEFAULT_COLUMN_FAMILY, "1");
final MockHBaseClientService hBaseClient = getHBaseClientService(runner);
runner.setProperty(PutHBaseJSON.ROW_FIELD_NAME, "rowField");
final String content = "{ \"field1\" : \"value1\", \"field2\" : \"value2\" }";
runner.enqueue(content.getBytes(StandardCharsets.UTF_8));
runner.run();
runner.assertAllFlowFilesTransferred(PutHBaseCell.REL_FAILURE, 1);
final MockFlowFile outFile = runner.getFlowFilesForRelationship(PutHBaseCell.REL_FAILURE).get(0);
outFile.assertContentEquals(content);
// should be no provenance events
assertEquals(0, runner.getProvenanceEvents().size());
// no puts should have made it to the client
assertEquals(0, hBaseClient.getFlowFilePuts().size());
}
@Test
public void testMultipleJsonDocsRouteToFailure() throws IOException, InitializationException {
final TestRunner runner = getTestRunner(DEFAULT_TABLE_NAME, DEFAULT_COLUMN_FAMILY, "1");
final MockHBaseClientService hBaseClient = getHBaseClientService(runner);
runner.setProperty(PutHBaseJSON.ROW_ID, DEFAULT_ROW);
final String content1 = "{ \"field1\" : \"value1\", \"field2\" : \"value2\" }";
final String content2 = "{ \"field3\" : \"value3\", \"field4\" : \"value4\" }";
final String content = "[ " + content1 + " , " + content2 + " ]";
runner.enqueue(content.getBytes(StandardCharsets.UTF_8));
runner.run();
runner.assertAllFlowFilesTransferred(PutHBaseCell.REL_FAILURE, 1);
final MockFlowFile outFile = runner.getFlowFilesForRelationship(PutHBaseCell.REL_FAILURE).get(0);
outFile.assertContentEquals(content);
// should be no provenance events
assertEquals(0, runner.getProvenanceEvents().size());
// no puts should have made it to the client
assertEquals(0, hBaseClient.getFlowFilePuts().size());
}
@Test
public void testELWithProvidedRowId() throws IOException, InitializationException {
final TestRunner runner = getTestRunner("${hbase.table}", "${hbase.colFamily}", "1");
final MockHBaseClientService hBaseClient = getHBaseClientService(runner);
runner.setProperty(PutHBaseJSON.ROW_ID, "${hbase.rowId}");
final Map<String, String> attributes = new HashMap<>();
attributes.put("hbase.table", "myTable");
attributes.put("hbase.colFamily", "myColFamily");
attributes.put("hbase.rowId", "myRowId");
final String content = "{ \"field1\" : \"value1\", \"field2\" : \"value2\" }";
runner.enqueue(content.getBytes("UTF-8"), attributes);
runner.run();
runner.assertAllFlowFilesTransferred(PutHBaseCell.REL_SUCCESS);
final MockFlowFile outFile = runner.getFlowFilesForRelationship(PutHBaseCell.REL_SUCCESS).get(0);
outFile.assertContentEquals(content);
assertNotNull(hBaseClient.getFlowFilePuts());
assertEquals(1, hBaseClient.getFlowFilePuts().size());
final List<PutFlowFile> puts = hBaseClient.getFlowFilePuts().get("myTable");
assertEquals(1, puts.size());
final Map<String, byte[]> expectedColumns = new HashMap<>();
expectedColumns.put("field1", hBaseClient.toBytes("value1"));
expectedColumns.put("field2", hBaseClient.toBytes("value2"));
HBaseTestUtil.verifyPut("myRowId", "myColFamily", expectedColumns, puts);
final List<ProvenanceEventRecord> events = runner.getProvenanceEvents();
assertEquals(1, events.size());
HBaseTestUtil.verifyEvent(runner.getProvenanceEvents(), "hbase://myTable/myRowId", ProvenanceEventType.SEND);
}
@Test
public void testELWithExtractedRowId() throws IOException, InitializationException {
final TestRunner runner = getTestRunner("${hbase.table}", "${hbase.colFamily}", "1");
final MockHBaseClientService hBaseClient = getHBaseClientService(runner);
runner.setProperty(PutHBaseJSON.ROW_FIELD_NAME, "${hbase.rowField}");
final Map<String, String> attributes = new HashMap<>();
attributes.put("hbase.table", "myTable");
attributes.put("hbase.colFamily", "myColFamily");
attributes.put("hbase.rowField", "field1");
final String content = "{ \"field1\" : \"value1\", \"field2\" : \"value2\" }";
runner.enqueue(content.getBytes("UTF-8"), attributes);
runner.run();
runner.assertAllFlowFilesTransferred(PutHBaseCell.REL_SUCCESS);
final MockFlowFile outFile = runner.getFlowFilesForRelationship(PutHBaseCell.REL_SUCCESS).get(0);
outFile.assertContentEquals(content);
assertNotNull(hBaseClient.getFlowFilePuts());
assertEquals(1, hBaseClient.getFlowFilePuts().size());
final List<PutFlowFile> puts = hBaseClient.getFlowFilePuts().get("myTable");
assertEquals(1, puts.size());
final Map<String, byte[]> expectedColumns = new HashMap<>();
expectedColumns.put("field2", hBaseClient.toBytes("value2"));
HBaseTestUtil.verifyPut("value1", "myColFamily", expectedColumns, puts);
final List<ProvenanceEventRecord> events = runner.getProvenanceEvents();
assertEquals(1, events.size());
HBaseTestUtil.verifyEvent(runner.getProvenanceEvents(), "hbase://myTable/value1", ProvenanceEventType.SEND);
}
@Test
public void testNullAndArrayElementsWithWarnStrategy() throws InitializationException {
final TestRunner runner = getTestRunner(DEFAULT_TABLE_NAME, DEFAULT_COLUMN_FAMILY, "1");
final MockHBaseClientService hBaseClient = getHBaseClientService(runner);
runner.setProperty(PutHBaseJSON.ROW_ID, DEFAULT_ROW);
runner.setProperty(PutHBaseJSON.COMPLEX_FIELD_STRATEGY, PutHBaseJSON.COMPLEX_FIELD_WARN.getValue());
// should route to success because there is at least one valid field
final String content = "{ \"field1\" : [{ \"child_field1\" : \"child_value1\" }], \"field2\" : \"value2\", \"field3\" : null }";
runner.enqueue(content.getBytes(StandardCharsets.UTF_8));
runner.run();
runner.assertAllFlowFilesTransferred(PutHBaseCell.REL_SUCCESS);
assertNotNull(hBaseClient.getFlowFilePuts());
assertEquals(1, hBaseClient.getFlowFilePuts().size());
final List<PutFlowFile> puts = hBaseClient.getFlowFilePuts().get(DEFAULT_TABLE_NAME);
assertEquals(1, puts.size());
// should have skipped field1 and field3
final Map<String, byte[]> expectedColumns = new HashMap<>();
expectedColumns.put("field2", hBaseClient.toBytes("value2"));
HBaseTestUtil.verifyPut(DEFAULT_ROW, DEFAULT_COLUMN_FAMILY, expectedColumns, puts);
}
@Test
public void testNullAndArrayElementsWithIgnoreStrategy() throws InitializationException {
final TestRunner runner = getTestRunner(DEFAULT_TABLE_NAME, DEFAULT_COLUMN_FAMILY, "1");
final MockHBaseClientService hBaseClient = getHBaseClientService(runner);
runner.setProperty(PutHBaseJSON.ROW_ID, DEFAULT_ROW);
runner.setProperty(PutHBaseJSON.COMPLEX_FIELD_STRATEGY, PutHBaseJSON.COMPLEX_FIELD_IGNORE.getValue());
// should route to success because there is at least one valid field
final String content = "{ \"field1\" : [{ \"child_field1\" : \"child_value1\" }], \"field2\" : \"value2\", \"field3\" : null }";
runner.enqueue(content.getBytes(StandardCharsets.UTF_8));
runner.run();
runner.assertAllFlowFilesTransferred(PutHBaseCell.REL_SUCCESS);
assertNotNull(hBaseClient.getFlowFilePuts());
assertEquals(1, hBaseClient.getFlowFilePuts().size());
final List<PutFlowFile> puts = hBaseClient.getFlowFilePuts().get(DEFAULT_TABLE_NAME);
assertEquals(1, puts.size());
// should have skipped field1 and field3
final Map<String, byte[]> expectedColumns = new HashMap<>();
expectedColumns.put("field2", hBaseClient.toBytes("value2"));
HBaseTestUtil.verifyPut(DEFAULT_ROW, DEFAULT_COLUMN_FAMILY, expectedColumns, puts);
}
@Test
public void testNullAndArrayElementsWithFailureStrategy() throws InitializationException {
final TestRunner runner = getTestRunner(DEFAULT_TABLE_NAME, DEFAULT_COLUMN_FAMILY, "1");
final MockHBaseClientService hBaseClient = getHBaseClientService(runner);
runner.setProperty(PutHBaseJSON.ROW_ID, DEFAULT_ROW);
runner.setProperty(PutHBaseJSON.COMPLEX_FIELD_STRATEGY, PutHBaseJSON.COMPLEX_FIELD_FAIL.getValue());
// should route to success because there is at least one valid field
final String content = "{ \"field1\" : [{ \"child_field1\" : \"child_value1\" }], \"field2\" : \"value2\", \"field3\" : null }";
runner.enqueue(content.getBytes(StandardCharsets.UTF_8));
runner.run();
runner.assertAllFlowFilesTransferred(PutHBaseCell.REL_FAILURE, 1);
final MockFlowFile outFile = runner.getFlowFilesForRelationship(PutHBaseCell.REL_FAILURE).get(0);
outFile.assertContentEquals(content);
// should be no provenance events
assertEquals(0, runner.getProvenanceEvents().size());
// no puts should have made it to the client
assertEquals(0, hBaseClient.getFlowFilePuts().size());
}
@Test
public void testNullAndArrayElementsWithTextStrategy() throws InitializationException {
final TestRunner runner = getTestRunner(DEFAULT_TABLE_NAME, DEFAULT_COLUMN_FAMILY, "1");
final MockHBaseClientService hBaseClient = getHBaseClientService(runner);
runner.setProperty(PutHBaseJSON.ROW_ID, DEFAULT_ROW);
runner.setProperty(PutHBaseJSON.COMPLEX_FIELD_STRATEGY, PutHBaseJSON.COMPLEX_FIELD_TEXT.getValue());
// should route to success because there is at least one valid field
final String content = "{ \"field1\" : [{ \"child_field1\" : \"child_value1\" }], \"field2\" : \"value2\", \"field3\" : null }";
runner.enqueue(content.getBytes(StandardCharsets.UTF_8));
runner.run();
runner.assertAllFlowFilesTransferred(PutHBaseCell.REL_SUCCESS);
assertNotNull(hBaseClient.getFlowFilePuts());
assertEquals(1, hBaseClient.getFlowFilePuts().size());
final List<PutFlowFile> puts = hBaseClient.getFlowFilePuts().get(DEFAULT_TABLE_NAME);
assertEquals(1, puts.size());
// should have skipped field1 and field3
final Map<String, byte[]> expectedColumns = new HashMap<>();
expectedColumns.put("field1", hBaseClient.toBytes("[{\"child_field1\":\"child_value1\"}]"));
expectedColumns.put("field2", hBaseClient.toBytes("value2"));
HBaseTestUtil.verifyPut(DEFAULT_ROW, DEFAULT_COLUMN_FAMILY, expectedColumns, puts);
}
@Test
public void testNestedDocWithTextStrategy() throws InitializationException {
final TestRunner runner = getTestRunner(DEFAULT_TABLE_NAME, DEFAULT_COLUMN_FAMILY, "1");
final MockHBaseClientService hBaseClient = getHBaseClientService(runner);
runner.setProperty(PutHBaseJSON.ROW_ID, DEFAULT_ROW);
runner.setProperty(PutHBaseJSON.COMPLEX_FIELD_STRATEGY, PutHBaseJSON.COMPLEX_FIELD_TEXT.getValue());
// should route to success because there is at least one valid field
final String content = "{ \"field1\" : { \"child_field1\" : \"child_value1\" }, \"field2\" : \"value2\", \"field3\" : null }";
runner.enqueue(content.getBytes(StandardCharsets.UTF_8));
runner.run();
runner.assertAllFlowFilesTransferred(PutHBaseCell.REL_SUCCESS);
assertNotNull(hBaseClient.getFlowFilePuts());
assertEquals(1, hBaseClient.getFlowFilePuts().size());
final List<PutFlowFile> puts = hBaseClient.getFlowFilePuts().get(DEFAULT_TABLE_NAME);
assertEquals(1, puts.size());
// should have skipped field1 and field3
final Map<String, byte[]> expectedColumns = new HashMap<>();
expectedColumns.put("field1", hBaseClient.toBytes("{\"child_field1\":\"child_value1\"}"));
expectedColumns.put("field2", hBaseClient.toBytes("value2"));
HBaseTestUtil.verifyPut(DEFAULT_ROW, DEFAULT_COLUMN_FAMILY, expectedColumns, puts);
}
@Test
public void testAllElementsAreNullOrArrays() throws InitializationException {
final TestRunner runner = getTestRunner(DEFAULT_TABLE_NAME, DEFAULT_COLUMN_FAMILY, "1");
final MockHBaseClientService hBaseClient = getHBaseClientService(runner);
runner.setProperty(PutHBaseJSON.ROW_ID, DEFAULT_ROW);
runner.setProperty(PutHBaseJSON.COMPLEX_FIELD_STRATEGY, PutHBaseJSON.COMPLEX_FIELD_WARN.getValue());
// should route to failure since it would produce a put with no columns
final String content = "{ \"field1\" : [{ \"child_field1\" : \"child_value1\" }], \"field2\" : null }";
runner.enqueue(content.getBytes(StandardCharsets.UTF_8));
runner.run();
runner.assertAllFlowFilesTransferred(PutHBaseCell.REL_FAILURE, 1);
final MockFlowFile outFile = runner.getFlowFilesForRelationship(PutHBaseCell.REL_FAILURE).get(0);
outFile.assertContentEquals(content);
// should be no provenance events
assertEquals(0, runner.getProvenanceEvents().size());
// no puts should have made it to the client
assertEquals(0, hBaseClient.getFlowFilePuts().size());
}
@Test
public void testInvalidJson() throws InitializationException {
final TestRunner runner = getTestRunner(DEFAULT_TABLE_NAME, DEFAULT_COLUMN_FAMILY, "1");
getHBaseClientService(runner);
runner.setProperty(PutHBaseJSON.ROW_ID, DEFAULT_ROW);
final String content = "NOT JSON";
runner.enqueue(content.getBytes(StandardCharsets.UTF_8));
runner.run();
runner.assertAllFlowFilesTransferred(PutHBaseCell.REL_FAILURE, 1);
}
@Test
public void testTimestamp() throws UnsupportedEncodingException, InitializationException {
final TestRunner runner = getTestRunner(DEFAULT_TABLE_NAME, DEFAULT_COLUMN_FAMILY, "1");
final MockHBaseClientService hBaseClient = getHBaseClientService(runner);
runner.setProperty(PutHBaseJSON.ROW_ID, DEFAULT_ROW);
runner.setProperty(PutHBaseJSON.TIMESTAMP, DEFAULT_TIMESTAMP.toString());
final String content = "{ \"field1\" : \"value1\", \"field2\" : \"value2\" }";
runner.enqueue(content.getBytes("UTF-8"));
runner.run();
runner.assertAllFlowFilesTransferred(PutHBaseCell.REL_SUCCESS);
final MockFlowFile outFile = runner.getFlowFilesForRelationship(PutHBaseCell.REL_SUCCESS).get(0);
outFile.assertContentEquals(content);
assertNotNull(hBaseClient.getFlowFilePuts());
assertEquals(1, hBaseClient.getFlowFilePuts().size());
final List<PutFlowFile> puts = hBaseClient.getFlowFilePuts().get(DEFAULT_TABLE_NAME);
assertEquals(1, puts.size());
final Map<String, byte[]> expectedColumns = new HashMap<>();
expectedColumns.put("field1", hBaseClient.toBytes("value1"));
expectedColumns.put("field2", hBaseClient.toBytes("value2"));
HBaseTestUtil.verifyPut(DEFAULT_ROW, DEFAULT_COLUMN_FAMILY, DEFAULT_TIMESTAMP, expectedColumns, puts);
}
@Test
public void testTimestampWithEL() throws UnsupportedEncodingException, InitializationException {
final TestRunner runner = getTestRunner(DEFAULT_TABLE_NAME, DEFAULT_COLUMN_FAMILY, "1");
final MockHBaseClientService hBaseClient = getHBaseClientService(runner);
runner.setProperty(PutHBaseJSON.ROW_ID, DEFAULT_ROW);
runner.setProperty(PutHBaseJSON.TIMESTAMP, "${hbase.timestamp}");
final Map<String, String> attributes = new HashMap<>();
attributes.put("hbase.timestamp", DEFAULT_TIMESTAMP.toString());
final String content = "{ \"field1\" : \"value1\", \"field2\" : \"value2\" }";
runner.enqueue(content.getBytes("UTF-8"), attributes);
runner.run();
runner.assertAllFlowFilesTransferred(PutHBaseCell.REL_SUCCESS);
final MockFlowFile outFile = runner.getFlowFilesForRelationship(PutHBaseCell.REL_SUCCESS).get(0);
outFile.assertContentEquals(content);
assertNotNull(hBaseClient.getFlowFilePuts());
assertEquals(1, hBaseClient.getFlowFilePuts().size());
final List<PutFlowFile> puts = hBaseClient.getFlowFilePuts().get(DEFAULT_TABLE_NAME);
assertEquals(1, puts.size());
final Map<String, byte[]> expectedColumns = new HashMap<>();
expectedColumns.put("field1", hBaseClient.toBytes("value1"));
expectedColumns.put("field2", hBaseClient.toBytes("value2"));
HBaseTestUtil.verifyPut(DEFAULT_ROW, DEFAULT_COLUMN_FAMILY, DEFAULT_TIMESTAMP, expectedColumns, puts);
}
private TestRunner getTestRunner(String table, String columnFamily, String batchSize) {
final TestRunner runner = TestRunners.newTestRunner(PutHBaseJSON.class);
runner.setProperty(PutHBaseJSON.TABLE_NAME, table);
runner.setProperty(PutHBaseJSON.COLUMN_FAMILY, columnFamily);
runner.setProperty(PutHBaseJSON.BATCH_SIZE, batchSize);
return runner;
}
}

View File

@ -1,199 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.nifi.hbase;
import org.apache.nifi.hbase.put.PutColumn;
import org.apache.nifi.hbase.put.PutFlowFile;
import org.apache.nifi.hbase.util.Bytes;
import org.apache.nifi.reporting.InitializationException;
import org.apache.nifi.serialization.record.MockRecordParser;
import org.apache.nifi.serialization.record.RecordFieldType;
import org.apache.nifi.util.MockFlowFile;
import org.apache.nifi.util.TestRunner;
import org.apache.nifi.util.TestRunners;
import org.junit.jupiter.api.Test;
import java.io.IOException;
import java.util.Arrays;
import java.util.Iterator;
import java.util.List;
import static org.apache.nifi.hbase.HBaseTestUtil.getHBaseClientService;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertFalse;
import static org.junit.jupiter.api.Assertions.assertNotNull;
import static org.junit.jupiter.api.Assertions.assertTrue;
public class TestPutHBaseRecord {
public static final String DEFAULT_TABLE_NAME = "nifi";
public static final String DEFAULT_COLUMN_FAMILY = "family1";
private TestRunner getTestRunner(String table, String columnFamily, String batchSize) {
final TestRunner runner = TestRunners.newTestRunner(PutHBaseRecord.class);
runner.enforceReadStreamsClosed(false);
runner.setProperty(PutHBaseJSON.TABLE_NAME, table);
runner.setProperty(PutHBaseJSON.COLUMN_FAMILY, columnFamily);
runner.setProperty(PutHBaseJSON.BATCH_SIZE, batchSize);
return runner;
}
private static final List<Integer> KEYS = Arrays.asList(1, 2, 3, 4);
private static final List<String> NAMES = Arrays.asList("rec1", "rec2", "rec3", "rec4");
private static final List<Long> CODES = Arrays.asList(101L, 102L, 103L, 104L);
private void generateTestData(TestRunner runner) throws IOException {
final MockRecordParser parser = new MockRecordParser();
try {
runner.addControllerService("parser", parser);
} catch (InitializationException e) {
throw new IOException(e);
}
runner.enableControllerService(parser);
runner.setProperty(PutHBaseRecord.RECORD_READER_FACTORY, "parser");
runner.setProperty(PutHBaseRecord.DEFAULT_VISIBILITY_STRING, "");
runner.setProperty(PutHBaseRecord.VISIBILITY_RECORD_PATH, "");
parser.addSchemaField("id", RecordFieldType.INT);
parser.addSchemaField("name", RecordFieldType.STRING);
parser.addSchemaField("code", RecordFieldType.LONG);
for (int x = 0; x < KEYS.size(); x++) {
parser.addRecord(KEYS.get(x), NAMES.get(x), CODES.get(x));
}
}
private void basicPutSetup(String encodingStrategy, PutValidator validator) throws Exception {
basicPutSetup(encodingStrategy, validator, "1000", 4);
}
private void basicPutSetup(String encodingStrategy, PutValidator validator, String batchSize, int expectedPuts) throws Exception {
assertEquals(1L, 1L);
TestRunner runner = getTestRunner(DEFAULT_TABLE_NAME, DEFAULT_COLUMN_FAMILY, batchSize);
runner.setProperty(PutHBaseRecord.ROW_FIELD_NAME, "id");
runner.setProperty(PutHBaseRecord.FIELD_ENCODING_STRATEGY, encodingStrategy);
MockHBaseClientService client = getHBaseClientService(runner);
generateTestData(runner);
runner.enqueue("Test".getBytes("UTF-8")); // This is to coax the processor into reading the data in the reader.l
runner.run();
List<MockFlowFile> results = runner.getFlowFilesForRelationship(PutHBaseRecord.REL_SUCCESS);
assertEquals(1, results.size(), "Wrong count");
assertEquals(client.getFlowFilePuts().get("nifi").size(), expectedPuts, "Wrong number of PutFlowFiles ");
for (PutFlowFile putFlowFile : client.getFlowFilePuts().get("nifi")) {
Iterator<PutColumn> columnIterator = putFlowFile.getColumns().iterator();
PutColumn name = columnIterator.next();
PutColumn code = columnIterator.next();
assertNotNull(name, "Name was null");
assertNotNull(code, "Code was null");
String nFamName = new String(name.getColumnFamily());
String cFamName = new String(code.getColumnFamily());
String nQual = new String(name.getColumnQualifier());
String cQual = new String(code.getColumnQualifier());
assertEquals(nFamName, DEFAULT_COLUMN_FAMILY, "Name column family didn't match");
assertEquals(cFamName, DEFAULT_COLUMN_FAMILY, "Code column family didn't match");
assertEquals(nQual, "name", "Name qualifier didn't match");
assertEquals(cQual, "code", "Code qualifier didn't match");
validator.handle(name, code);
}
}
@Test
public void testByteEncodedPut() throws Exception {
basicPutSetup(PutHBaseRecord.BYTES_ENCODING_VALUE, (PutColumn[] columns) -> {
PutColumn name = columns[0];
PutColumn code = columns[1];
String nameVal = Bytes.toString(name.getBuffer());
Long codeVal = Bytes.toLong(code.getBuffer());
assertTrue(NAMES.contains(nameVal), "Name was not found");
assertTrue(CODES.contains(codeVal), "Code was not found ");
});
}
private void innertTest(PutColumn[] columns) {
PutColumn name = columns[0];
PutColumn code = columns[1];
String nameVal = Bytes.toString(name.getBuffer());
String codeVal = Bytes.toString(code.getBuffer());
assertTrue(NAMES.contains(nameVal), "Name was not found");
assertTrue(CODES.contains(Long.valueOf(codeVal)), "Code was not found ");
}
@Test
public void testStringEncodedPut() throws Exception {
basicPutSetup(PutHBaseRecord.STRING_ENCODING_VALUE, (PutColumn[] columns) -> {
innertTest(columns);
});
}
@Test
public void testBatchOfOne() throws Exception {
basicPutSetup(PutHBaseRecord.STRING_ENCODING_VALUE, (PutColumn[] columns) -> {
innertTest(columns);
}, "1", 1);
}
@Test
public void testBatchOfTwo() throws Exception {
basicPutSetup(PutHBaseRecord.STRING_ENCODING_VALUE, (PutColumn[] columns) -> {
innertTest(columns);
}, "2", 2);
}
@Test
public void testFailure() throws Exception {
assertEquals(1L, 1L);
TestRunner runner = getTestRunner(DEFAULT_TABLE_NAME, DEFAULT_COLUMN_FAMILY, "2");
runner.setProperty(PutHBaseRecord.ROW_FIELD_NAME, "id");
runner.setProperty(PutHBaseRecord.FIELD_ENCODING_STRATEGY, PutHBaseRecord.STRING_ENCODING_VALUE);
MockHBaseClientService client = getHBaseClientService(runner);
client.setTestFailure(true);
client.setFailureThreshold(2);
generateTestData(runner);
runner.enqueue("Test".getBytes("UTF-8")); // This is to coax the processor into reading the data in the reader.
runner.run();
List<MockFlowFile> result = runner.getFlowFilesForRelationship(PutHBaseRecord.REL_FAILURE);
assertEquals(result.size(), 1, "Size was wrong");
assertEquals(client.getFlowFilePuts().get("nifi").size(), 2, "Wrong # of PutFlowFiles");
assertTrue(runner.getFlowFilesForRelationship(PutHBaseRecord.REL_SUCCESS).size() == 0);
MockFlowFile mff = result.get(0);
assertNotNull("Missing restart index attribute", mff.getAttribute("restart.index"));
List<PutFlowFile> old = client.getFlowFilePuts().get("nifi");
client.setTestFailure(false);
runner.enqueue("test");
runner.run();
assertEquals(result.size(), 1, "Size was wrong");
assertEquals(client.getFlowFilePuts().get("nifi").size(), 2, "Wrong # of PutFlowFiles");
List<PutFlowFile> newPFF = client.getFlowFilePuts().get("nifi");
for (PutFlowFile putFlowFile : old) {
assertFalse(newPFF.contains(putFlowFile), "Duplication");
}
}
interface PutValidator {
void handle(PutColumn... columns);
}
}

View File

@ -1,406 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.nifi.hbase;
import org.apache.nifi.reporting.InitializationException;
import org.apache.nifi.util.MockFlowFile;
import org.apache.nifi.util.TestRunner;
import org.apache.nifi.util.TestRunners;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
import java.util.HashMap;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertNotEquals;
public class TestScanHBase {
private ScanHBase proc;
private MockHBaseClientService hBaseClientService;
private TestRunner runner;
@BeforeEach
public void setup() throws InitializationException {
proc = new ScanHBase();
runner = TestRunners.newTestRunner(proc);
hBaseClientService = new MockHBaseClientService();
runner.addControllerService("hbaseClient", hBaseClientService);
runner.enableControllerService(hBaseClientService);
runner.setProperty(ScanHBase.HBASE_CLIENT_SERVICE, "hbaseClient");
}
@Test
public void testColumnsValidation() {
runner.setProperty(ScanHBase.TABLE_NAME, "table1");
runner.setProperty(ScanHBase.START_ROW, "row1");
runner.setProperty(ScanHBase.END_ROW, "row1");
runner.assertValid();
runner.setProperty(ScanHBase.COLUMNS, "cf1:cq1");
runner.assertValid();
runner.setProperty(ScanHBase.COLUMNS, "cf1");
runner.assertValid();
runner.setProperty(ScanHBase.COLUMNS, "cf1:cq1,cf2:cq2,cf3:cq3");
runner.assertValid();
runner.setProperty(ScanHBase.COLUMNS, "cf1,cf2:cq1,cf3");
runner.assertValid();
runner.setProperty(ScanHBase.COLUMNS, "cf1 cf2,cf3");
runner.assertNotValid();
runner.setProperty(ScanHBase.COLUMNS, "cf1:,cf2,cf3");
runner.assertNotValid();
runner.setProperty(ScanHBase.COLUMNS, "cf1:cq1,");
runner.assertNotValid();
}
@Test
public void testNoIncomingFlowFile() {
runner.setProperty(ScanHBase.TABLE_NAME, "table1");
runner.setProperty(ScanHBase.START_ROW, "row1");
runner.setProperty(ScanHBase.END_ROW, "row1");
runner.run();
runner.assertTransferCount(ScanHBase.REL_FAILURE, 0);
runner.assertTransferCount(ScanHBase.REL_SUCCESS, 0);
runner.assertTransferCount(ScanHBase.REL_ORIGINAL, 0);
assertEquals(0, hBaseClientService.getNumScans());
}
@Test
public void testInvalidTableName() {
runner.setProperty(ScanHBase.TABLE_NAME, "${hbase.table}");
runner.setProperty(ScanHBase.START_ROW, "row1");
runner.setProperty(ScanHBase.END_ROW, "row1");
runner.enqueue("trigger flow file");
runner.run();
runner.assertTransferCount(ScanHBase.REL_FAILURE, 1);
runner.assertTransferCount(ScanHBase.REL_SUCCESS, 0);
runner.assertTransferCount(ScanHBase.REL_ORIGINAL, 0);
assertEquals(0, hBaseClientService.getNumScans());
}
@Test
public void testResultsNotFound() {
runner.setProperty(ScanHBase.TABLE_NAME, "table1");
runner.setProperty(ScanHBase.START_ROW, "row1");
runner.setProperty(ScanHBase.END_ROW, "row1");
runner.enqueue("trigger flow file");
runner.run();
runner.assertTransferCount(ScanHBase.REL_FAILURE, 0);
runner.assertTransferCount(ScanHBase.REL_SUCCESS, 0);
runner.assertTransferCount(ScanHBase.REL_ORIGINAL, 1);
MockFlowFile flowFile = runner.getFlowFilesForRelationship(ScanHBase.REL_ORIGINAL).get(0);
flowFile.assertAttributeEquals("scanhbase.results.found", Boolean.FALSE.toString());
assertEquals(1, hBaseClientService.getNumScans());
}
@Test
public void testScanToContentWithStringValues() {
final Map<String, String> cells = new LinkedHashMap<>();
cells.put("cq1", "val1");
cells.put("cq2", "val2");
final long ts1 = 123456789;
hBaseClientService.addResult("row1", cells, ts1);
hBaseClientService.addResult("row2", cells, ts1);
runner.setProperty(ScanHBase.TABLE_NAME, "table1");
runner.setProperty(ScanHBase.START_ROW, "row1");
runner.setProperty(ScanHBase.END_ROW, "row2");
runner.setProperty(ScanHBase.TIME_RANGE_MIN, "0");
runner.setProperty(ScanHBase.TIME_RANGE_MAX, "1111111110");
runner.setProperty(ScanHBase.LIMIT_ROWS, "10");
runner.setProperty(ScanHBase.REVERSED_SCAN, "false");
runner.setProperty(ScanHBase.BULK_SIZE, "10");
runner.enqueue("trigger flow file");
runner.run();
runner.assertTransferCount(ScanHBase.REL_FAILURE, 0);
runner.assertTransferCount(ScanHBase.REL_SUCCESS, 1);
runner.assertTransferCount(ScanHBase.REL_ORIGINAL, 1);
MockFlowFile flowFile = runner.getFlowFilesForRelationship(ScanHBase.REL_SUCCESS).get(0);
flowFile.assertContentEquals("[{\"row\":\"row1\", \"cells\": [" +
"{\"fam\":\"nifi\",\"qual\":\"cq1\",\"val\":\"val1\",\"ts\":" + ts1 + "}, " +
"{\"fam\":\"nifi\",\"qual\":\"cq2\",\"val\":\"val2\",\"ts\":" + ts1 + "}]},\n"
+ "{\"row\":\"row2\", \"cells\": [" +
"{\"fam\":\"nifi\",\"qual\":\"cq1\",\"val\":\"val1\",\"ts\":" + ts1 + "}, " +
"{\"fam\":\"nifi\",\"qual\":\"cq2\",\"val\":\"val2\",\"ts\":" + ts1 + "}]}]");
flowFile.assertAttributeEquals(ScanHBase.HBASE_ROWS_COUNT_ATTR, "2");
flowFile = runner.getFlowFilesForRelationship(ScanHBase.REL_ORIGINAL).get(0);
flowFile.assertAttributeEquals("scanhbase.results.found", Boolean.TRUE.toString());
assertEquals(1, hBaseClientService.getNumScans());
}
@Test
public void testScanBulkSize() {
final Map<String, String> cells = new HashMap<>();
cells.put("cq1", "val1");
cells.put("cq2", "val2");
for (int i = 0; i < 15; i++) {
hBaseClientService.addResult("row" + i, cells, System.currentTimeMillis());
}
runner.setProperty(ScanHBase.TABLE_NAME, "${hbase.table}");
runner.setProperty(ScanHBase.START_ROW, "${hbase.row}1");
runner.setProperty(ScanHBase.END_ROW, "${hbase.row}2");
runner.setProperty(ScanHBase.COLUMNS, "${hbase.cols}");
runner.setProperty(ScanHBase.TIME_RANGE_MIN, "${tr_min}");
runner.setProperty(ScanHBase.TIME_RANGE_MAX, "${tr_max}");
runner.setProperty(ScanHBase.LIMIT_ROWS, "${limit}");
runner.setProperty(ScanHBase.BULK_SIZE, "${bulk.size}");
final Map<String, String> attributes = new HashMap<>();
attributes.put("hbase.table", "table1");
attributes.put("hbase.row", "row");
attributes.put("hbase.cols", "nifi:cq2");
attributes.put("tr_min", "10000000");
attributes.put("tr_max", "10000001");
attributes.put("limit", "1000");
attributes.put("bulk.size", "10");
runner.enqueue("trigger flow file", attributes);
runner.run();
runner.assertTransferCount(ScanHBase.REL_FAILURE, 0);
runner.assertTransferCount(ScanHBase.REL_SUCCESS, 2);
runner.assertTransferCount(ScanHBase.REL_ORIGINAL, 1);
MockFlowFile flowFile = runner.getFlowFilesForRelationship(ScanHBase.REL_SUCCESS).get(0);
flowFile.assertAttributeEquals(ScanHBase.HBASE_ROWS_COUNT_ATTR, "10");
flowFile = runner.getFlowFilesForRelationship(ScanHBase.REL_SUCCESS).get(1);
flowFile.assertAttributeEquals(ScanHBase.HBASE_ROWS_COUNT_ATTR, "5");
}
@Test
public void testScanBatchSizeTimesOfBulkSize() {
final Map<String, String> cells = new HashMap<>();
cells.put("cq1", "val1");
cells.put("cq2", "val2");
for (int i = 0; i < 1000; i++) {
hBaseClientService.addResult("row" + i, cells, System.currentTimeMillis());
}
runner.setProperty(ScanHBase.TABLE_NAME, "${hbase.table}");
runner.setProperty(ScanHBase.START_ROW, "${hbase.row}1");
runner.setProperty(ScanHBase.END_ROW, "${hbase.row}2");
runner.setProperty(ScanHBase.COLUMNS, "${hbase.cols}");
runner.setProperty(ScanHBase.TIME_RANGE_MIN, "${tr_min}");
runner.setProperty(ScanHBase.TIME_RANGE_MAX, "${tr_max}");
runner.setProperty(ScanHBase.LIMIT_ROWS, "${limit}");
runner.setProperty(ScanHBase.BULK_SIZE, "${bulk.size}");
final Map<String, String> attributes = new HashMap<>();
attributes.put("hbase.table", "table1");
attributes.put("hbase.row", "row");
attributes.put("hbase.cols", "nifi:cq2");
attributes.put("tr_min", "10000000");
attributes.put("tr_max", "10000001");
attributes.put("limit", "1000");
attributes.put("bulk.size", "100");
runner.enqueue("trigger flow file", attributes);
runner.run();
runner.assertTransferCount(ScanHBase.REL_FAILURE, 0);
runner.assertTransferCount(ScanHBase.REL_SUCCESS, 10);
runner.assertTransferCount(ScanHBase.REL_ORIGINAL, 1);
runner.getFlowFilesForRelationship(ScanHBase.REL_SUCCESS).forEach(ff -> {
ff.assertAttributeEquals(ScanHBase.HBASE_ROWS_COUNT_ATTR, "100");
assertNotEquals(0, ff.getId()); // since total amount of rows is a multiplication of bulkSize, original FF (with id=0) shouldn't be present on output.
});
}
@Test
public void testScanBatchSizeTimesCutBulkSize() {
final Map<String, String> cells = new HashMap<>();
cells.put("cq1", "val1");
cells.put("cq2", "val2");
for (int i = 0; i < 1102; i++) {
hBaseClientService.addResult("row" + i, cells, System.currentTimeMillis());
}
runner.setProperty(ScanHBase.TABLE_NAME, "${hbase.table}");
runner.setProperty(ScanHBase.START_ROW, "${hbase.row}1");
runner.setProperty(ScanHBase.END_ROW, "${hbase.row}2");
runner.setProperty(ScanHBase.COLUMNS, "${hbase.cols}");
runner.setProperty(ScanHBase.TIME_RANGE_MIN, "${tr_min}");
runner.setProperty(ScanHBase.TIME_RANGE_MAX, "${tr_max}");
runner.setProperty(ScanHBase.LIMIT_ROWS, "${limit}");
runner.setProperty(ScanHBase.BULK_SIZE, "${bulk.size}");
final Map<String, String> attributes = new HashMap<>();
attributes.put("hbase.table", "table1");
attributes.put("hbase.row", "row");
attributes.put("hbase.cols", "nifi:cq2");
attributes.put("tr_min", "10000000");
attributes.put("tr_max", "10000001");
attributes.put("limit", "1000");
attributes.put("bulk.size", "110");
runner.enqueue("trigger flow file", attributes);
runner.run();
runner.assertTransferCount(ScanHBase.REL_FAILURE, 0);
runner.assertTransferCount(ScanHBase.REL_SUCCESS, 11);
runner.assertTransferCount(ScanHBase.REL_ORIGINAL, 1);
List<MockFlowFile> ffs = runner.getFlowFilesForRelationship(ScanHBase.REL_SUCCESS);
int i = 0;
for (MockFlowFile ff : ffs)
ff.assertAttributeEquals(ScanHBase.HBASE_ROWS_COUNT_ATTR, new String(i++ < 10 ? "110" : "2")); //last ff should have only 2
}
@Test
public void testScanToContentWithQualifierAndValueJSON() {
final Map<String, String> cells = new LinkedHashMap<>();
cells.put("cq1", "val1");
cells.put("cq2", "val2");
hBaseClientService.addResult("row1", cells, System.currentTimeMillis());
runner.setProperty(ScanHBase.TABLE_NAME, "table1");
runner.setProperty(ScanHBase.START_ROW, "row1");
runner.setProperty(ScanHBase.END_ROW, "row1");
runner.setProperty(ScanHBase.JSON_FORMAT, ScanHBase.JSON_FORMAT_QUALIFIER_AND_VALUE);
runner.enqueue("trigger flow file");
runner.run();
runner.assertTransferCount(ScanHBase.REL_FAILURE, 0);
runner.assertTransferCount(ScanHBase.REL_SUCCESS, 1);
runner.assertTransferCount(ScanHBase.REL_ORIGINAL, 1);
final MockFlowFile flowFile = runner.getFlowFilesForRelationship(ScanHBase.REL_SUCCESS).get(0);
flowFile.assertContentEquals("[{\"cq1\":\"val1\", \"cq2\":\"val2\"}]");
assertEquals(1, hBaseClientService.getNumScans());
}
@Test
public void testScanWithExpressionLanguage() {
final Map<String, String> cells = new HashMap<>();
// cells.put("cq1", "val1");
cells.put("cq2", "val2");
final long ts1 = 123456789;
hBaseClientService.addResult("row1", cells, ts1);
runner.setProperty(ScanHBase.TABLE_NAME, "${hbase.table}");
runner.setProperty(ScanHBase.START_ROW, "${hbase.row}1");
runner.setProperty(ScanHBase.END_ROW, "${hbase.row}2");
runner.setProperty(ScanHBase.COLUMNS, "${hbase.cols}");
runner.setProperty(ScanHBase.TIME_RANGE_MIN, "${tr_min}");
runner.setProperty(ScanHBase.TIME_RANGE_MAX, "${tr_max}");
runner.setProperty(ScanHBase.LIMIT_ROWS, "${limit}");
runner.setProperty(ScanHBase.BULK_SIZE, "${bulk.size}");
final Map<String, String> attributes = new HashMap<>();
attributes.put("hbase.table", "table1");
attributes.put("hbase.row", "row");
attributes.put("hbase.cols", "nifi:cq2");
attributes.put("tr_min", "10000000");
attributes.put("tr_max", "10000001");
attributes.put("limit", "1000");
attributes.put("bulk.size", "10");
runner.enqueue("trigger flow file", attributes);
runner.run();
runner.assertTransferCount(ScanHBase.REL_FAILURE, 0);
runner.assertTransferCount(ScanHBase.REL_SUCCESS, 1);
runner.assertTransferCount(ScanHBase.REL_ORIGINAL, 1);
final MockFlowFile flowFile = runner.getFlowFilesForRelationship(ScanHBase.REL_SUCCESS).get(0);
flowFile.assertContentEquals("[{\"row\":\"row1\", \"cells\": [{\"fam\":\"nifi\",\"qual\":\"cq2\",\"val\":\"val2\",\"ts\":" + ts1 + "}]}]");
assertEquals(1, hBaseClientService.getNumScans());
}
@Test
public void testScanWhenScanThrowsException() {
hBaseClientService.setThrowException(true);
runner.setProperty(ScanHBase.TABLE_NAME, "table1");
runner.setProperty(ScanHBase.START_ROW, "row1");
runner.setProperty(ScanHBase.END_ROW, "row1");
runner.enqueue("trigger flow file");
runner.run();
runner.assertTransferCount(ScanHBase.REL_FAILURE, 1);
runner.assertTransferCount(ScanHBase.REL_SUCCESS, 0);
runner.assertTransferCount(ScanHBase.REL_ORIGINAL, 0);
assertEquals(0, hBaseClientService.getNumScans());
}
@Test
public void testScanWhenScanThrowsExceptionAfterLineN() {
hBaseClientService.setLinesBeforeException(1);
final Map<String, String> cells = new HashMap<>();
cells.put("cq1", "val1");
cells.put("cq2", "val2");
final long ts1 = 123456789;
hBaseClientService.addResult("row1", cells, ts1);
hBaseClientService.addResult("row2", cells, ts1);
runner.setProperty(ScanHBase.TABLE_NAME, "table1");
runner.setProperty(ScanHBase.START_ROW, "row1");
runner.setProperty(ScanHBase.END_ROW, "row2");
runner.enqueue("trigger flow file");
runner.run();
hBaseClientService.setLinesBeforeException(-1);
runner.assertTransferCount(ScanHBase.REL_FAILURE, 1);
runner.assertTransferCount(ScanHBase.REL_SUCCESS, 0);
runner.assertTransferCount(ScanHBase.REL_ORIGINAL, 0);
assertEquals(0, hBaseClientService.getNumScans());
}
}

View File

@ -1,130 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.nifi.hbase;
import org.apache.nifi.hbase.util.VisibilityUtil;
import org.apache.nifi.processor.ProcessContext;
import org.apache.nifi.util.MockFlowFile;
import org.apache.nifi.util.TestRunner;
import org.apache.nifi.util.TestRunners;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
import java.util.HashMap;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertNotNull;
public class TestVisibilityUtil {
private TestRunner runner;
@BeforeEach
public void setup() throws Exception {
runner = TestRunners.newTestRunner(PutHBaseCell.class);
final MockHBaseClientService hBaseClient = new MockHBaseClientService();
runner.addControllerService("hbaseClient", hBaseClient);
runner.enableControllerService(hBaseClient);
runner.setProperty(PutHBaseCell.HBASE_CLIENT_SERVICE, "hbaseClient");
runner.setProperty(PutHBaseCell.TABLE_NAME, "test");
runner.setProperty(PutHBaseCell.COLUMN_QUALIFIER, "test");
runner.setProperty(PutHBaseCell.COLUMN_FAMILY, "test");
runner.assertValid();
}
@Test
public void testAllPresentOnFlowfile() {
runner.setProperty("visibility.test.test", "U&PII");
MockFlowFile ff = new MockFlowFile(System.currentTimeMillis());
ff.putAttributes(new HashMap<String, String>() {{
put("visibility.test.test", "U&PII&PHI");
}});
ProcessContext context = runner.getProcessContext();
String label = VisibilityUtil.pickVisibilityString("test", "test", ff, context);
assertNotNull(label);
assertEquals("U&PII&PHI", label);
}
@Test
public void testOnlyColumnFamilyOnFlowfile() {
runner.setProperty("visibility.test", "U&PII");
MockFlowFile ff = new MockFlowFile(System.currentTimeMillis());
ff.putAttributes(new HashMap<String, String>() {{
put("visibility.test", "U&PII&PHI");
}});
ProcessContext context = runner.getProcessContext();
String label = VisibilityUtil.pickVisibilityString("test", "test", ff, context);
assertNotNull(label);
assertEquals("U&PII&PHI", label);
}
@Test
public void testInvalidAttributes() {
runner.setProperty("visibility.test", "U&PII");
MockFlowFile ff = new MockFlowFile(System.currentTimeMillis());
ff.putAttributes(new HashMap<String, String>() {{
put("visibility..test", "U&PII&PHI");
}});
ProcessContext context = runner.getProcessContext();
String label = VisibilityUtil.pickVisibilityString("test", "test", ff, context);
assertNotNull(label);
assertEquals("U&PII", label);
}
@Test
public void testColumnFamilyAttributeOnly() {
MockFlowFile ff = new MockFlowFile(System.currentTimeMillis());
ff.putAttributes(new HashMap<String, String>() {{
put("visibility.test", "U&PII");
}});
ProcessContext context = runner.getProcessContext();
String label = VisibilityUtil.pickVisibilityString("test", "test", ff, context);
assertNotNull(label);
assertEquals("U&PII", label);
}
@Test
public void testNoAttributes() {
runner.setProperty("visibility.test", "U&PII");
MockFlowFile ff = new MockFlowFile(System.currentTimeMillis());
ProcessContext context = runner.getProcessContext();
String label = VisibilityUtil.pickVisibilityString("test", "test", ff, context);
assertNotNull(label);
assertEquals("U&PII", label);
runner.setProperty("visibility.test.test", "U&PII&PHI");
label = VisibilityUtil.pickVisibilityString("test", "test", ff, context);
assertNotNull(label);
assertEquals("U&PII&PHI", label);
}
}

View File

@ -1,117 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.nifi.hbase.io;
import org.apache.commons.codec.binary.Base64;
import org.apache.nifi.hbase.scan.ResultCell;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.nio.charset.StandardCharsets;
import static org.junit.jupiter.api.Assertions.assertEquals;
public class TestJsonFullRowSerializer {
static final String ROW = "row1";
static final String FAM1 = "colFam1";
static final String QUAL1 = "colQual1";
static final String VAL1 = "val1";
static final long TS1 = 1111111111;
static final String FAM2 = "colFam2";
static final String QUAL2 = "colQual2";
static final String VAL2 = "val2";
static final long TS2 = 222222222;
private final byte[] rowKey = ROW.getBytes(StandardCharsets.UTF_8);
private ResultCell[] cells;
@BeforeEach
public void setup() {
final byte[] cell1Fam = FAM1.getBytes(StandardCharsets.UTF_8);
final byte[] cell1Qual = QUAL1.getBytes(StandardCharsets.UTF_8);
final byte[] cell1Val = VAL1.getBytes(StandardCharsets.UTF_8);
final byte[] cell2Fam = FAM2.getBytes(StandardCharsets.UTF_8);
final byte[] cell2Qual = QUAL2.getBytes(StandardCharsets.UTF_8);
final byte[] cell2Val = VAL2.getBytes(StandardCharsets.UTF_8);
final ResultCell cell1 = getResultCell(cell1Fam, cell1Qual, cell1Val, TS1);
final ResultCell cell2 = getResultCell(cell2Fam, cell2Qual, cell2Val, TS2);
cells = new ResultCell[] {cell1, cell2};
}
@Test
public void testSerializeRegular() throws IOException {
final ByteArrayOutputStream out = new ByteArrayOutputStream();
final RowSerializer rowSerializer = new JsonFullRowSerializer(StandardCharsets.UTF_8, StandardCharsets.UTF_8);
rowSerializer.serialize(rowKey, cells, out);
final String json = out.toString(StandardCharsets.UTF_8.name());
assertEquals("{\"row\":\"row1\", \"cells\": [" +
"{\"fam\":\"" + FAM1 + "\",\"qual\":\"" + QUAL1 + "\",\"val\":\"" + VAL1 + "\",\"ts\":" + TS1 + "}, " +
"{\"fam\":\"" + FAM2 + "\",\"qual\":\"" + QUAL2 + "\",\"val\":\"" + VAL2 + "\",\"ts\":" + TS2 + "}]}",
json);
}
@Test
public void testSerializeWithBase64() throws IOException {
final ByteArrayOutputStream out = new ByteArrayOutputStream();
final RowSerializer rowSerializer = new JsonFullRowSerializer(StandardCharsets.UTF_8, StandardCharsets.UTF_8, true);
rowSerializer.serialize(rowKey, cells, out);
final String rowBase64 = Base64.encodeBase64String(ROW.getBytes(StandardCharsets.UTF_8));
final String fam1Base64 = Base64.encodeBase64String(FAM1.getBytes(StandardCharsets.UTF_8));
final String qual1Base64 = Base64.encodeBase64String(QUAL1.getBytes(StandardCharsets.UTF_8));
final String val1Base64 = Base64.encodeBase64String(VAL1.getBytes(StandardCharsets.UTF_8));
final String fam2Base64 = Base64.encodeBase64String(FAM2.getBytes(StandardCharsets.UTF_8));
final String qual2Base64 = Base64.encodeBase64String(QUAL2.getBytes(StandardCharsets.UTF_8));
final String val2Base64 = Base64.encodeBase64String(VAL2.getBytes(StandardCharsets.UTF_8));
final String json = out.toString(StandardCharsets.UTF_8.name());
assertEquals("{\"row\":\"" + rowBase64 + "\", \"cells\": [" +
"{\"fam\":\"" + fam1Base64 + "\",\"qual\":\"" + qual1Base64 + "\",\"val\":\"" + val1Base64 + "\",\"ts\":" + TS1 + "}, " +
"{\"fam\":\"" + fam2Base64 + "\",\"qual\":\"" + qual2Base64 + "\",\"val\":\"" + val2Base64 + "\",\"ts\":" + TS2 + "}]}", json);
}
private ResultCell getResultCell(byte[] fam, byte[] qual, byte[] val, long timestamp) {
final ResultCell cell = new ResultCell();
cell.setFamilyArray(fam);
cell.setFamilyOffset(0);
cell.setFamilyLength((byte) fam.length);
cell.setQualifierArray(qual);
cell.setQualifierOffset(0);
cell.setQualifierLength(qual.length);
cell.setValueArray(val);
cell.setValueOffset(0);
cell.setValueLength(val.length);
cell.setTimestamp(timestamp);
return cell;
}
}

View File

@ -1,109 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.nifi.hbase.io;
import org.apache.commons.codec.binary.Base64;
import org.apache.nifi.hbase.scan.ResultCell;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.nio.charset.StandardCharsets;
import static org.junit.jupiter.api.Assertions.assertEquals;
public class TestJsonQualifierAndValueRowSerializer {
static final String ROW = "row1";
static final String FAM1 = "colFam1";
static final String QUAL1 = "colQual1";
static final String VAL1 = "val1";
static final long TS1 = 1111111111;
static final String FAM2 = "colFam2";
static final String QUAL2 = "colQual2";
static final String VAL2 = "val2";
static final long TS2 = 222222222;
private final byte[] rowKey = ROW.getBytes(StandardCharsets.UTF_8);
private ResultCell[] cells;
@BeforeEach
public void setup() {
final byte[] cell1Fam = FAM1.getBytes(StandardCharsets.UTF_8);
final byte[] cell1Qual = QUAL1.getBytes(StandardCharsets.UTF_8);
final byte[] cell1Val = VAL1.getBytes(StandardCharsets.UTF_8);
final byte[] cell2Fam = FAM2.getBytes(StandardCharsets.UTF_8);
final byte[] cell2Qual = QUAL2.getBytes(StandardCharsets.UTF_8);
final byte[] cell2Val = VAL2.getBytes(StandardCharsets.UTF_8);
final ResultCell cell1 = getResultCell(cell1Fam, cell1Qual, cell1Val, TS1);
final ResultCell cell2 = getResultCell(cell2Fam, cell2Qual, cell2Val, TS2);
cells = new ResultCell[] {cell1, cell2};
}
@Test
public void testSerializeRegular() throws IOException {
final ByteArrayOutputStream out = new ByteArrayOutputStream();
final RowSerializer rowSerializer = new JsonQualifierAndValueRowSerializer(StandardCharsets.UTF_8, StandardCharsets.UTF_8);
rowSerializer.serialize(rowKey, cells, out);
final String json = out.toString(StandardCharsets.UTF_8.name());
assertEquals("{\"" + QUAL1 + "\":\"" + VAL1 + "\", \"" + QUAL2 + "\":\"" + VAL2 + "\"}", json);
}
@Test
public void testSerializeWithBase64() throws IOException {
final ByteArrayOutputStream out = new ByteArrayOutputStream();
final RowSerializer rowSerializer = new JsonQualifierAndValueRowSerializer(StandardCharsets.UTF_8, StandardCharsets.UTF_8, true);
rowSerializer.serialize(rowKey, cells, out);
final String qual1Base64 = Base64.encodeBase64String(QUAL1.getBytes(StandardCharsets.UTF_8));
final String val1Base64 = Base64.encodeBase64String(VAL1.getBytes(StandardCharsets.UTF_8));
final String qual2Base64 = Base64.encodeBase64String(QUAL2.getBytes(StandardCharsets.UTF_8));
final String val2Base64 = Base64.encodeBase64String(VAL2.getBytes(StandardCharsets.UTF_8));
final String json = out.toString(StandardCharsets.UTF_8.name());
assertEquals("{\"" + qual1Base64 + "\":\"" + val1Base64 + "\", \"" + qual2Base64 + "\":\"" + val2Base64 + "\"}", json);
}
private ResultCell getResultCell(byte[] fam, byte[] qual, byte[] val, long timestamp) {
final ResultCell cell = new ResultCell();
cell.setFamilyArray(fam);
cell.setFamilyOffset(0);
cell.setFamilyLength((byte) fam.length);
cell.setQualifierArray(qual);
cell.setQualifierOffset(0);
cell.setQualifierLength(qual.length);
cell.setValueArray(val);
cell.setValueOffset(0);
cell.setValueLength(val.length);
cell.setTimestamp(timestamp);
return cell;
}
}

View File

@ -1,78 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.nifi.hbase.io;
import org.apache.nifi.hbase.scan.ResultCell;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.nio.charset.StandardCharsets;
import static org.junit.jupiter.api.Assertions.assertEquals;
public class TestJsonRowSerializer {
private final byte[] rowKey = "row1".getBytes(StandardCharsets.UTF_8);
private ResultCell[] cells;
@BeforeEach
public void setup() {
final byte[] cell1Fam = "colFam1".getBytes(StandardCharsets.UTF_8);
final byte[] cell1Qual = "colQual1".getBytes(StandardCharsets.UTF_8);
final byte[] cell1Val = "val1".getBytes(StandardCharsets.UTF_8);
final byte[] cell2Fam = "colFam2".getBytes(StandardCharsets.UTF_8);
final byte[] cell2Qual = "colQual2".getBytes(StandardCharsets.UTF_8);
final byte[] cell2Val = "val2".getBytes(StandardCharsets.UTF_8);
final ResultCell cell1 = getResultCell(cell1Fam, cell1Qual, cell1Val);
final ResultCell cell2 = getResultCell(cell2Fam, cell2Qual, cell2Val);
cells = new ResultCell[] {cell1, cell2};
}
@Test
public void testSerializeRegular() throws IOException {
final ByteArrayOutputStream out = new ByteArrayOutputStream();
final RowSerializer rowSerializer = new JsonRowSerializer(StandardCharsets.UTF_8);
rowSerializer.serialize(rowKey, cells, out);
final String json = out.toString(StandardCharsets.UTF_8.name());
assertEquals("{\"row\":\"row1\", \"cells\": {\"colFam1:colQual1\":\"val1\", \"colFam2:colQual2\":\"val2\"}}", json);
}
private ResultCell getResultCell(byte[] fam, byte[] qual, byte[] val) {
final ResultCell cell = new ResultCell();
cell.setFamilyArray(fam);
cell.setFamilyOffset(0);
cell.setFamilyLength((byte) fam.length);
cell.setQualifierArray(qual);
cell.setQualifierOffset(0);
cell.setQualifierLength(qual.length);
cell.setValueArray(val);
cell.setValueOffset(0);
cell.setValueLength(val.length);
return cell;
}
}

View File

@ -1,68 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.nifi.hbase.util;
import java.nio.charset.Charset;
public class Bytes {
public static String toString(byte[] b) {
return b == null ? null : toString(b, 0, b.length);
}
public static String toString(byte[] b1, String sep, byte[] b2) {
return toString(b1, 0, b1.length) + sep + toString(b2, 0, b2.length);
}
public static String toString(byte[] b, int off, int len) {
if (b == null) {
return null;
} else {
return len == 0 ? "" : new String(b, off, len, Charset.forName("UTF-8"));
}
}
public static long toLong(byte[] bytes) {
return toLong(bytes, 0, 8);
}
private static long toLong(byte[] bytes, int offset, int length) {
if (length == 8 && offset + length <= bytes.length) {
long l = 0L;
for (int i = offset; i < offset + length; ++i) {
l <<= 8;
l ^= bytes[i] & 255;
}
return l;
} else {
throw explainWrongLengthOrOffset(bytes, offset, length, 8);
}
}
private static IllegalArgumentException explainWrongLengthOrOffset(byte[] bytes, int offset, int length, int expectedLength) {
String reason;
if (length != expectedLength) {
reason = "Wrong length: " + length + ", expected " + expectedLength;
} else {
reason = "offset (" + offset + ") + length (" + length + ") exceed the" + " capacity of the array: " + bytes.length;
}
return new IllegalArgumentException(reason);
}
}

View File

@ -1,75 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.nifi.hbase.util;
import org.junit.jupiter.api.Test;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.ObjectInputStream;
import java.io.ObjectOutputStream;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertNotNull;
import static org.junit.jupiter.api.Assertions.assertNull;
import static org.junit.jupiter.api.Assertions.assertTrue;
public class TestObjectSerDe {
@Test
public void testDeserializeSuccessful() throws IOException {
final ObjectSerDe serDe = new ObjectSerDe();
final String myObject = "myObject";
final ByteArrayOutputStream bOut = new ByteArrayOutputStream();
final ObjectOutputStream out = new ObjectOutputStream(bOut);
out.writeObject(myObject);
byte[] myObjectBytes = bOut.toByteArray();
assertNotNull(myObjectBytes);
assertTrue(myObjectBytes.length > 0);
final Object deserialized = serDe.deserialize(myObjectBytes);
assertTrue(deserialized instanceof String);
assertEquals(myObject, deserialized);
}
@Test
public void testDeserializeNull() throws IOException {
final ObjectSerDe serDe = new ObjectSerDe();
final Object deserialized = serDe.deserialize(null);
assertNull(deserialized);
}
@Test
public void testSerialize() throws IOException, ClassNotFoundException {
final ByteArrayOutputStream out = new ByteArrayOutputStream();
final String myObject = "myObject";
final ObjectSerDe serDe = new ObjectSerDe();
serDe.serialize(myObject, out);
final ByteArrayInputStream bIn = new ByteArrayInputStream(out.toByteArray());
final ObjectInputStream in = new ObjectInputStream(bIn);
final Object deserialized = in.readObject();
assertTrue(deserialized instanceof String);
assertEquals(myObject, deserialized);
}
}

View File

@ -1,31 +0,0 @@
<?xml version="1.0" encoding="UTF-8"?>
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 https://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<parent>
<groupId>org.apache.nifi</groupId>
<artifactId>nifi-standard-shared-bom</artifactId>
<version>2.0.0-SNAPSHOT</version>
<relativePath>../nifi-standard-shared-bundle/nifi-standard-shared-bom</relativePath>
</parent>
<artifactId>nifi-hbase-bundle</artifactId>
<packaging>pom</packaging>
<modules>
<module>nifi-hbase-processors</module>
<module>nifi-hbase-nar</module>
</modules>
</project>

View File

@ -78,12 +78,6 @@
<version>2.0.0-SNAPSHOT</version>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>org.apache.nifi</groupId>
<artifactId>nifi-hbase-client-service-api</artifactId>
<version>2.0.0-SNAPSHOT</version>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>org.apache.nifi</groupId>
<artifactId>nifi-oauth2-provider-api</artifactId>

View File

@ -1,25 +0,0 @@
<?xml version="1.0" encoding="UTF-8"?>
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 https://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<parent>
<groupId>org.apache.nifi</groupId>
<artifactId>nifi-standard-services</artifactId>
<version>2.0.0-SNAPSHOT</version>
</parent>
<artifactId>nifi-hbase-client-service-api</artifactId>
<packaging>jar</packaging>
</project>

View File

@ -1,60 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.nifi.hbase;
/**
* Encapsulates the information for a delete operation.
*/
public class DeleteRequest {
private byte[] rowId;
private byte[] columnFamily;
private byte[] columnQualifier;
private String visibilityLabel;
public DeleteRequest(byte[] rowId, byte[] columnFamily, byte[] columnQualifier, String visibilityLabel) {
this.rowId = rowId;
this.columnFamily = columnFamily;
this.columnQualifier = columnQualifier;
this.visibilityLabel = visibilityLabel;
}
public byte[] getRowId() {
return rowId;
}
public byte[] getColumnFamily() {
return columnFamily;
}
public byte[] getColumnQualifier() {
return columnQualifier;
}
public String getVisibilityLabel() {
return visibilityLabel;
}
@Override
public String toString() {
return new StringBuilder()
.append(String.format("Row ID: %s\n", new String(rowId)))
.append(String.format("Column Family: %s\n", new String(columnFamily)))
.append(String.format("Column Qualifier: %s\n", new String(columnQualifier)))
.append(visibilityLabel != null ? String.format("Visibility Label: %s", visibilityLabel) : "")
.toString();
}
}

View File

@ -1,27 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.nifi.hbase;
public class HBaseClientException extends Exception {
public HBaseClientException(final String message) {
super(message);
}
public HBaseClientException(final Throwable cause) {
super(cause);
}
}

View File

@ -1,247 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.nifi.hbase;
import org.apache.nifi.annotation.documentation.CapabilityDescription;
import org.apache.nifi.annotation.documentation.Tags;
import org.apache.nifi.controller.ControllerService;
import org.apache.nifi.hbase.put.PutColumn;
import org.apache.nifi.hbase.put.PutFlowFile;
import org.apache.nifi.hbase.scan.Column;
import org.apache.nifi.hbase.scan.HBaseRegion;
import org.apache.nifi.hbase.scan.ResultHandler;
import java.io.IOException;
import java.util.Collection;
import java.util.List;
@Tags({"hbase", "client"})
@CapabilityDescription("A controller service for accessing an HBase client.")
public interface HBaseClientService extends ControllerService {
/**
* Puts a batch of mutations to the given table.
*
* @param tableName the name of an HBase table
* @param puts a list of put mutations for the given table
* @throws IOException thrown when there are communication errors with HBase
*/
void put(String tableName, Collection<PutFlowFile> puts) throws IOException;
/**
* Puts the given row to HBase with the provided columns.
*
* @param tableName the name of an HBase table
* @param rowId the id of the row to put
* @param columns the columns of the row to put
* @throws IOException thrown when there are communication errors with HBase
*/
void put(String tableName, byte[] rowId, Collection<PutColumn> columns) throws IOException;
/**
* Atomically checks if a row/family/qualifier value matches the expected value. If it does, then the Put is added to HBase.
*
* @param tableName the name of an HBase table
* @param rowId the id of the row to check
* @param family the family of the row to check
* @param qualifier the qualifier of the row to check
* @param value the value of the row to check. If null, the check is for the lack of column (ie: non-existence)
* @return True if the Put was executed, false otherwise
* @throws IOException thrown when there are communication errors with HBase$
*/
boolean checkAndPut(String tableName, byte[] rowId, byte[] family, byte[] qualifier, byte[] value, PutColumn column) throws IOException;
/**
* Deletes the given row on HBase. All cells are deleted.
*
* @param tableName the name of an HBase table
* @param rowId the id of the row to delete
* @throws IOException thrown when there are communication errors with HBase
*/
void delete(String tableName, byte[] rowId) throws IOException;
/**
* Deletes the given row on HBase. Uses the supplied visibility label for all cells in the delete.
* It will fail if HBase cannot delete a cell because the visibility label on the cell does not match the specified
* label.
*
* @param tableName the name of an HBase table
* @param rowId the id of the row to delete
* @param visibilityLabel a visibility label to apply to the delete
* @throws IOException thrown when there are communication errors with HBase
*/
void delete(String tableName, byte[] rowId, String visibilityLabel) throws IOException;
/**
* Deletes a list of rows in HBase. All cells are deleted.
*
* @param tableName the name of an HBase table
* @param rowIds a list of rowIds to send in a batch delete
*/
void delete(String tableName, List<byte[]> rowIds) throws IOException;
/**
* Deletes a list of cells from HBase. This is intended to be used with granular delete operations.
*
* @param tableName the name of an HBase table.
* @param deletes a list of DeleteRequest objects.
* @throws IOException thrown when there are communication errors with HBase
*/
void deleteCells(String tableName, List<DeleteRequest> deletes) throws IOException;
/**
* Deletes a list of rows in HBase. All cells that match the visibility label are deleted.
*
* @param tableName the name of an HBase table
* @param rowIds a list of rowIds to send in a batch delete
* @param visibilityLabel a visibility label expression
*/
void delete(String tableName, List<byte[]> rowIds, String visibilityLabel) throws IOException;
/**
* Scans the given table using the optional filter criteria and passing each result to the provided handler.
*
* @param tableName the name of an HBase table to scan
* @param columns optional columns to return, if not specified all columns are returned
* @param filterExpression optional filter expression, if not specified no filtering is performed
* @param minTime the minimum timestamp of cells to return, passed to the HBase scanner timeRange
* @param handler a handler to process rows of the result set
* @throws IOException thrown when there are communication errors with HBase
*/
void scan(String tableName, Collection<Column> columns, String filterExpression, long minTime, ResultHandler handler) throws IOException;
/**
* Scans the given table using the optional filter criteria and passing each result to the provided handler.
*
* @param tableName the name of an HBase table to scan
* @param columns optional columns to return, if not specified all columns are returned
* @param filterExpression optional filter expression, if not specified no filtering is performed
* @param minTime the minimum timestamp of cells to return, passed to the HBase scanner timeRange
* @param authorizations the visibility labels to apply to the scanner.
* @param handler a handler to process rows of the result set
* @throws IOException thrown when there are communication errors with HBase
*/
void scan(String tableName, Collection<Column> columns, String filterExpression, long minTime, List<String> authorizations, ResultHandler handler) throws IOException;
/**
* Scans the given table for the given rowId and passes the result to the handler.
*
* @param tableName the name of an HBase table to scan
* @param startRow the row identifier to start scanning at
* @param endRow the row identifier to end scanning at
* @param columns optional columns to return, if not specified all columns are returned
* @param handler a handler to process rows of the result
* @throws IOException thrown when there are communication errors with HBase
*/
void scan(String tableName, byte[] startRow, byte[] endRow, Collection<Column> columns, List<String> authorizations, ResultHandler handler) throws IOException;
/**
* Scans the given table for the given range of row keys or time rage and passes the result to a handler.<br/>
*
* @param tableName the name of an HBase table to scan
* @param startRow the row identifier to start scanning at
* @param endRow the row identifier to end scanning at
* @param filterExpression optional filter expression, if not specified no filtering is performed
* @param timerangeMin the minimum timestamp of cells to return, passed to the HBase scanner timeRange
* @param timerangeMax the maximum timestamp of cells to return, passed to the HBase scanner timeRange
* @param limitRows the maximum number of rows to be returned by scanner
* @param isReversed whether this scan is a reversed one.
* @param blockCache set to use the block cache option of hbase scan.
* @param columns optional columns to return, if not specified all columns are returned
* @param authorizations optional list of visibility labels that the user should be able to see when communicating with HBase
* @param handler a handler to process rows of the result
*/
void scan(String tableName, String startRow, String endRow, String filterExpression, Long timerangeMin, Long timerangeMax, Integer limitRows,
Boolean isReversed, Boolean blockCache, Collection<Column> columns, List<String> authorizations, ResultHandler handler) throws IOException;
/**
* Returns a {@link List} of {@link HBaseRegion} objects that represent information about the HBase table
* regions for all regions in the HBase table.
* @param tableName the name of the HBase table to fetch region information for
*/
List<HBaseRegion> listHBaseRegions(String tableName) throws HBaseClientException;
/**
* Converts the given boolean to it's byte representation.
*
* @param b a boolean
* @return the boolean represented as bytes
*/
byte[] toBytes(boolean b);
/**
* Converts the given float to its byte representation.
*
* @param f a float
* @return the float represented as bytes
*/
byte[] toBytes(float f);
/**
* Converts the given float to its byte representation.
*
* @param i an int
* @return the int represented as bytes
*/
byte[] toBytes(int i);
/**
* Converts the given long to it's byte representation.
*
* @param l a long
* @return the long represented as bytes
*/
byte[] toBytes(long l);
/**
* Converts the given double to it's byte representation.
*
* @param d a double
* @return the double represented as bytes
*/
byte[] toBytes(double d);
/**
* Converts the given string to it's byte representation.
*
* @param s a string
* @return the string represented as bytes
*/
byte[] toBytes(String s);
/**
* Converts the given binary formatted string to a byte representation
* @param s a binary encoded string
* @return the string represented as bytes
*/
byte[] toBytesBinary(String s);
/**
* Create a transit URI from the current configuration and the specified table name.
* The default implementation just prepend "hbase://" to the table name and row key, i.e. "hbase://tableName/rowKey".
* @param tableName The name of a HBase table
* @param rowKey The target HBase row key, this can be null or empty string if the operation is not targeted to a specific row
* @return a qualified transit URI which can identify a HBase table row in a HBase cluster
*/
default String toTransitUri(String tableName, String rowKey) {
return "hbase://" + tableName + (rowKey != null && !rowKey.isEmpty() ? "/" + rowKey : "");
}
}

View File

@ -1,71 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.nifi.hbase.put;
/**
* Encapsulates the information for one column of a put operation.
*/
public class PutColumn {
private final byte[] columnFamily;
private final byte[] columnQualifier;
private final byte[] buffer;
private final String visibility;
private final Long timestamp;
public PutColumn(final byte[] columnFamily, final byte[] columnQualifier, final byte[] buffer) {
this(columnFamily, columnQualifier, buffer, null, null);
}
public PutColumn(final byte[] columnFamily, final byte[] columnQualifier, final byte[] buffer, final String visibility) {
this(columnFamily, columnQualifier, buffer, null, visibility);
}
public PutColumn(final byte[] columnFamily, final byte[] columnQualifier, final byte[] buffer, final Long timestamp) {
this(columnFamily, columnQualifier, buffer, timestamp, null);
}
public PutColumn(final byte[] columnFamily, final byte[] columnQualifier, final byte[] buffer, final Long timestamp, final String visibility) {
this.columnFamily = columnFamily;
this.columnQualifier = columnQualifier;
this.buffer = buffer;
this.timestamp = timestamp;
this.visibility = (visibility != null && visibility.trim().length() > 0) ? visibility : null;
}
public byte[] getColumnFamily() {
return columnFamily;
}
public byte[] getColumnQualifier() {
return columnQualifier;
}
public byte[] getBuffer() {
return buffer;
}
public String getVisibility() {
return visibility;
}
public Long getTimestamp() {
return timestamp;
}
}

View File

@ -1,82 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.nifi.hbase.put;
import org.apache.nifi.flowfile.FlowFile;
import java.util.Collection;
/**
* Wrapper to encapsulate all of the information for the Put along with the FlowFile.
*/
public class PutFlowFile {
private final String tableName;
private final byte[] row;
private final Collection<PutColumn> columns;
private final FlowFile flowFile;
public PutFlowFile(String tableName, byte[] row, Collection<PutColumn> columns, FlowFile flowFile) {
this.tableName = tableName;
this.row = row;
this.columns = columns;
this.flowFile = flowFile;
}
public String getTableName() {
return tableName;
}
public byte[] getRow() {
return row;
}
public Collection<PutColumn> getColumns() {
return columns;
}
public FlowFile getFlowFile() {
return flowFile;
}
public boolean isValid() {
if (tableName == null || tableName.trim().isEmpty() || null == row || flowFile == null || columns == null || columns.isEmpty()) {
return false;
}
for (PutColumn column : columns) {
if (null == column.getColumnQualifier() || null == column.getColumnFamily() || column.getBuffer() == null) {
return false;
}
}
return true;
}
@Override
public boolean equals(Object obj) {
if (obj instanceof PutFlowFile) {
PutFlowFile pff = (PutFlowFile) obj;
return this.tableName.equals(pff.tableName)
&& this.row.equals(pff.row)
&& this.columns.equals(pff.columns)
&& this.flowFile.equals(pff.flowFile);
} else {
return false;
}
}
}

View File

@ -1,71 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.nifi.hbase.scan;
import java.util.Arrays;
/**
* Wrapper to encapsulate a column family and qualifier.
*/
public class Column {
private final byte[] family;
private final byte[] qualifier;
public Column(byte[] family, byte[] qualifier) {
this.family = family;
this.qualifier = qualifier;
}
public byte[] getFamily() {
return family;
}
public byte[] getQualifier() {
return qualifier;
}
@Override
public boolean equals(Object obj) {
if (!(obj instanceof Column)) {
return false;
}
final Column other = (Column) obj;
return ((this.family == null && other.family == null)
|| (this.family != null && other.family != null && Arrays.equals(this.family, other.family)))
&& ((this.qualifier == null && other.qualifier == null)
|| (this.qualifier != null && other.qualifier != null && Arrays.equals(this.qualifier, other.qualifier)));
}
@Override
public int hashCode() {
int result = 37;
if (family != null) {
for (byte b : family) {
result += (int) b;
}
}
if (qualifier != null) {
for (byte b : qualifier) {
result += (int) b;
}
}
return result;
}
}

View File

@ -1,57 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.nifi.hbase.scan;
public class HBaseRegion {
private byte[] startRowKey;
private byte[] endRowKey;
private String regionName;
private long regionId;
private boolean isDegenerate;
public HBaseRegion(final byte[] startRowKey,
final byte[] endRowKey,
final String regionName,
final long regionId,
final boolean isDegenerate) {
this.startRowKey = startRowKey;
this.endRowKey = endRowKey;
this.regionName = regionName;
this.regionId = regionId;
this.isDegenerate = isDegenerate;
}
public byte[] getStartRowKey() {
return startRowKey;
}
public byte[] getEndRowKey() {
return endRowKey;
}
public String getRegionName() {
return regionName;
}
public long getRegionId() {
return regionId;
}
public boolean isDegenerate() {
return isDegenerate;
}
}

View File

@ -1,188 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.nifi.hbase.scan;
public class ResultCell {
byte[] rowArray;
int rowOffset;
short rowLength;
byte[] familyArray;
int familyOffset;
byte familyLength;
byte[] qualifierArray;
int qualifierOffset;
int qualifierLength;
long timestamp;
byte typeByte;
long sequenceId;
byte[] valueArray;
int valueOffset;
int valueLength;
byte[] tagsArray;
int tagsOffset;
int tagsLength;
public byte[] getRowArray() {
return rowArray;
}
public void setRowArray(byte[] rowArray) {
this.rowArray = rowArray;
}
public int getRowOffset() {
return rowOffset;
}
public void setRowOffset(int rowOffset) {
this.rowOffset = rowOffset;
}
public short getRowLength() {
return rowLength;
}
public void setRowLength(short rowLength) {
this.rowLength = rowLength;
}
public byte[] getFamilyArray() {
return familyArray;
}
public void setFamilyArray(byte[] familyArray) {
this.familyArray = familyArray;
}
public int getFamilyOffset() {
return familyOffset;
}
public void setFamilyOffset(int familyOffset) {
this.familyOffset = familyOffset;
}
public byte getFamilyLength() {
return familyLength;
}
public void setFamilyLength(byte familyLength) {
this.familyLength = familyLength;
}
public byte[] getQualifierArray() {
return qualifierArray;
}
public void setQualifierArray(byte[] qualifierArray) {
this.qualifierArray = qualifierArray;
}
public int getQualifierOffset() {
return qualifierOffset;
}
public void setQualifierOffset(int qualifierOffset) {
this.qualifierOffset = qualifierOffset;
}
public int getQualifierLength() {
return qualifierLength;
}
public void setQualifierLength(int qualifierLength) {
this.qualifierLength = qualifierLength;
}
public long getTimestamp() {
return timestamp;
}
public void setTimestamp(long timestamp) {
this.timestamp = timestamp;
}
public byte getTypeByte() {
return typeByte;
}
public void setTypeByte(byte typeByte) {
this.typeByte = typeByte;
}
public long getSequenceId() {
return sequenceId;
}
public void setSequenceId(long sequenceId) {
this.sequenceId = sequenceId;
}
public byte[] getValueArray() {
return valueArray;
}
public void setValueArray(byte[] valueArray) {
this.valueArray = valueArray;
}
public int getValueOffset() {
return valueOffset;
}
public void setValueOffset(int valueOffset) {
this.valueOffset = valueOffset;
}
public int getValueLength() {
return valueLength;
}
public void setValueLength(int valueLength) {
this.valueLength = valueLength;
}
public byte[] getTagsArray() {
return tagsArray;
}
public void setTagsArray(byte[] tagsArray) {
this.tagsArray = tagsArray;
}
public int getTagsOffset() {
return tagsOffset;
}
public void setTagsOffset(int tagsOffset) {
this.tagsOffset = tagsOffset;
}
public int getTagsLength() {
return tagsLength;
}
public void setTagsLength(int tagsLength) {
this.tagsLength = tagsLength;
}
}

View File

@ -1,28 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.nifi.hbase.scan;
import java.io.IOException;
/**
* Handles a single row from an HBase scan.
*/
public interface ResultHandler {
void handle(byte[] row, ResultCell[] resultCells) throws IOException;
}

View File

@ -1,41 +0,0 @@
<?xml version="1.0" encoding="UTF-8"?>
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 https://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<parent>
<groupId>org.apache.nifi</groupId>
<artifactId>nifi-hbase_2-client-service-bundle</artifactId>
<version>2.0.0-SNAPSHOT</version>
</parent>
<artifactId>nifi-hbase_2-client-service-nar</artifactId>
<packaging>nar</packaging>
<dependencies>
<dependency>
<groupId>org.apache.nifi</groupId>
<artifactId>nifi-standard-shared-nar</artifactId>
<version>2.0.0-SNAPSHOT</version>
<type>nar</type>
</dependency>
<dependency>
<groupId>org.apache.nifi</groupId>
<artifactId>nifi-hbase_2-client-service</artifactId>
<version>2.0.0-SNAPSHOT</version>
</dependency>
</dependencies>
</project>

View File

@ -1,357 +0,0 @@
Apache License
Version 2.0, January 2004
http://www.apache.org/licenses/
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
1. Definitions.
"License" shall mean the terms and conditions for use, reproduction,
and distribution as defined by Sections 1 through 9 of this document.
"Licensor" shall mean the copyright owner or entity authorized by
the copyright owner that is granting the License.
"Legal Entity" shall mean the union of the acting entity and all
other entities that control, are controlled by, or are under common
control with that entity. For the purposes of this definition,
"control" means (i) the power, direct or indirect, to cause the
direction or management of such entity, whether by contract or
otherwise, or (ii) ownership of fifty percent (50%) or more of the
outstanding shares, or (iii) beneficial ownership of such entity.
"You" (or "Your") shall mean an individual or Legal Entity
exercising permissions granted by this License.
"Source" form shall mean the preferred form for making modifications,
including but not limited to software source code, documentation
source, and configuration files.
"Object" form shall mean any form resulting from mechanical
transformation or translation of a Source form, including but
not limited to compiled object code, generated documentation,
and conversions to other media types.
"Work" shall mean the work of authorship, whether in Source or
Object form, made available under the License, as indicated by a
copyright notice that is included in or attached to the work
(an example is provided in the Appendix below).
"Derivative Works" shall mean any work, whether in Source or Object
form, that is based on (or derived from) the Work and for which the
editorial revisions, annotations, elaborations, or other modifications
represent, as a whole, an original work of authorship. For the purposes
of this License, Derivative Works shall not include works that remain
separable from, or merely link (or bind by name) to the interfaces of,
the Work and Derivative Works thereof.
"Contribution" shall mean any work of authorship, including
the original version of the Work and any modifications or additions
to that Work or Derivative Works thereof, that is intentionally
submitted to Licensor for inclusion in the Work by the copyright owner
or by an individual or Legal Entity authorized to submit on behalf of
the copyright owner. For the purposes of this definition, "submitted"
means any form of electronic, verbal, or written communication sent
to the Licensor or its representatives, including but not limited to
communication on electronic mailing lists, source code control systems,
and issue tracking systems that are managed by, or on behalf of, the
Licensor for the purpose of discussing and improving the Work, but
excluding communication that is conspicuously marked or otherwise
designated in writing by the copyright owner as "Not a Contribution."
"Contributor" shall mean Licensor and any individual or Legal Entity
on behalf of whom a Contribution has been received by Licensor and
subsequently incorporated within the Work.
2. Grant of Copyright License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
copyright license to reproduce, prepare Derivative Works of,
publicly display, publicly perform, sublicense, and distribute the
Work and such Derivative Works in Source or Object form.
3. Grant of Patent License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
(except as stated in this section) patent license to make, have made,
use, offer to sell, sell, import, and otherwise transfer the Work,
where such license applies only to those patent claims licensable
by such Contributor that are necessarily infringed by their
Contribution(s) alone or by combination of their Contribution(s)
with the Work to which such Contribution(s) was submitted. If You
institute patent litigation against any entity (including a
cross-claim or counterclaim in a lawsuit) alleging that the Work
or a Contribution incorporated within the Work constitutes direct
or contributory patent infringement, then any patent licenses
granted to You under this License for that Work shall terminate
as of the date such litigation is filed.
4. Redistribution. You may reproduce and distribute copies of the
Work or Derivative Works thereof in any medium, with or without
modifications, and in Source or Object form, provided that You
meet the following conditions:
(a) You must give any other recipients of the Work or
Derivative Works a copy of this License; and
(b) You must cause any modified files to carry prominent notices
stating that You changed the files; and
(c) You must retain, in the Source form of any Derivative Works
that You distribute, all copyright, patent, trademark, and
attribution notices from the Source form of the Work,
excluding those notices that do not pertain to any part of
the Derivative Works; and
(d) If the Work includes a "NOTICE" text file as part of its
distribution, then any Derivative Works that You distribute must
include a readable copy of the attribution notices contained
within such NOTICE file, excluding those notices that do not
pertain to any part of the Derivative Works, in at least one
of the following places: within a NOTICE text file distributed
as part of the Derivative Works; within the Source form or
documentation, if provided along with the Derivative Works; or,
within a display generated by the Derivative Works, if and
wherever such third-party notices normally appear. The contents
of the NOTICE file are for informational purposes only and
do not modify the License. You may add Your own attribution
notices within Derivative Works that You distribute, alongside
or as an addendum to the NOTICE text from the Work, provided
that such additional attribution notices cannot be construed
as modifying the License.
You may add Your own copyright statement to Your modifications and
may provide additional or different license terms and conditions
for use, reproduction, or distribution of Your modifications, or
for any such Derivative Works as a whole, provided Your use,
reproduction, and distribution of the Work otherwise complies with
the conditions stated in this License.
5. Submission of Contributions. Unless You explicitly state otherwise,
any Contribution intentionally submitted for inclusion in the Work
by You to the Licensor shall be under the terms and conditions of
this License, without any additional terms or conditions.
Notwithstanding the above, nothing herein shall supersede or modify
the terms of any separate license agreement you may have executed
with Licensor regarding such Contributions.
6. Trademarks. This License does not grant permission to use the trade
names, trademarks, service marks, or product names of the Licensor,
except as required for reasonable and customary use in describing the
origin of the Work and reproducing the content of the NOTICE file.
7. Disclaimer of Warranty. Unless required by applicable law or
agreed to in writing, Licensor provides the Work (and each
Contributor provides its Contributions) on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
implied, including, without limitation, any warranties or conditions
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
PARTICULAR PURPOSE. You are solely responsible for determining the
appropriateness of using or redistributing the Work and assume any
risks associated with Your exercise of permissions under this License.
8. Limitation of Liability. In no event and under no legal theory,
whether in tort (including negligence), contract, or otherwise,
unless required by applicable law (such as deliberate and grossly
negligent acts) or agreed to in writing, shall any Contributor be
liable to You for damages, including any direct, indirect, special,
incidental, or consequential damages of any character arising as a
result of this License or out of the use or inability to use the
Work (including but not limited to damages for loss of goodwill,
work stoppage, computer failure or malfunction, or any and all
other commercial damages or losses), even if such Contributor
has been advised of the possibility of such damages.
9. Accepting Warranty or Additional Liability. While redistributing
the Work or Derivative Works thereof, You may choose to offer,
and charge a fee for, acceptance of support, warranty, indemnity,
or other liability obligations and/or rights consistent with this
License. However, in accepting such obligations, You may act only
on Your own behalf and on Your sole responsibility, not on behalf
of any other Contributor, and only if You agree to indemnify,
defend, and hold each Contributor harmless for any liability
incurred by, or claims asserted against, such Contributor by reason
of your accepting any such warranty or additional liability.
END OF TERMS AND CONDITIONS
APPENDIX: How to apply the Apache License to your work.
To apply the Apache License to your work, attach the following
boilerplate notice, with the fields enclosed by brackets "[]"
replaced with your own identifying information. (Don't include
the brackets!) The text should be enclosed in the appropriate
comment syntax for the file format. We also recommend that a
file or class name and description of purpose be included on the
same "printed page" as the copyright notice for easier
identification within third-party archives.
Copyright [yyyy] [name of copyright owner]
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
APACHE NIFI SUBCOMPONENTS:
The Apache NiFi project contains subcomponents with separate copyright
notices and license terms. Your use of the source code for the these
subcomponents is subject to the terms and conditions of the following
licenses.
The binary distribution of this product bundles 'Jcodings' under an MIT style
license.
Permission is hereby granted, free of charge, to any person obtaining a copy of
this software and associated documentation files (the "Software"), to deal in
the Software without restriction, including without limitation the rights to
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
of the Software, and to permit persons to whom the Software is furnished to do
so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
The binary distribution of this product bundles 'Joni' under an MIT style
license.
Permission is hereby granted, free of charge, to any person obtaining a copy of
this software and associated documentation files (the "Software"), to deal in
the Software without restriction, including without limitation the rights to
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
of the Software, and to permit persons to whom the Software is furnished to do
so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
The binary distribution of this product bundles 'Google Protocol Buffers Java 2.5.0'
which is licensed under a BSD license.
This license applies to all parts of Protocol Buffers except the following:
- Atomicops support for generic gcc, located in
src/google/protobuf/stubs/atomicops_internals_generic_gcc.h.
This file is copyrighted by Red Hat Inc.
- Atomicops support for AIX/POWER, located in
src/google/protobuf/stubs/atomicops_internals_aix.h.
This file is copyrighted by Bloomberg Finance LP.
Copyright 2014, Google Inc. All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above
copyright notice, this list of conditions and the following disclaimer
in the documentation and/or other materials provided with the
distribution.
* Neither the name of Google Inc. nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
Code generated by the Protocol Buffer compiler is owned by the owner
of the input file used when generating it. This code is not
standalone and requires a support library to be linked with it. This
support library is itself covered by the above license.
The binary distribution of this product bundles 'Paranamer Core' which is available
under a BSD style license.
Copyright (c) 2006 Paul Hammant & ThoughtWorks Inc
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
1. Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
3. Neither the name of the copyright holders nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
THE POSSIBILITY OF SUCH DAMAGE.
The binary distribution of this product bundles 'JCraft Jsch' which is available
under a BSD style license.
Copyright (c) 2002-2014 Atsuhiko Yamanaka, JCraft,Inc.
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
1. Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in
the documentation and/or other materials provided with the distribution.
3. The names of the authors may not be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED WARRANTIES,
INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL JCRAFT,
INC. OR ANY CONTRIBUTORS TO THIS SOFTWARE BE LIABLE FOR ANY DIRECT, INDIRECT,
INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA,
OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

View File

@ -1,322 +0,0 @@
nifi-hbase_2-client-service-nar
Copyright 2014-2024 The Apache Software Foundation
This product includes software developed at
The Apache Software Foundation (http://www.apache.org/).
******************
Apache Software License v2
******************
(ASLv2) Apache Commons CLI
The following NOTICE information applies:
Apache Commons CLI
Copyright 2001-2009 The Apache Software Foundation
(ASLv2) Apache Curator
The following NOTICE information applies:
Curator Framework
Copyright 2011-2014 The Apache Software Foundation
Curator Client
Copyright 2011-2014 The Apache Software Foundation
Curator Recipes
Copyright 2011-2014 The Apache Software Foundation
(ASLv2) Apache Directory Server
The following NOTICE information applies:
ApacheDS Protocol Kerberos Codec
Copyright 2003-2013 The Apache Software Foundation
ApacheDS I18n
Copyright 2003-2013 The Apache Software Foundation
Apache Directory API ASN.1 API
Copyright 2003-2013 The Apache Software Foundation
Apache Directory LDAP API Utilities
Copyright 2003-2013 The Apache Software Foundation
(ASLv2) Apache Commons Math
The following NOTICE information applies:
Apache Commons Math
Copyright 2001-2012 The Apache Software Foundation
This product includes software developed by
The Apache Software Foundation (http://www.apache.org/).
===============================================================================
The BracketFinder (package org.apache.commons.math3.optimization.univariate)
and PowellOptimizer (package org.apache.commons.math3.optimization.general)
classes are based on the Python code in module "optimize.py" (version 0.5)
developed by Travis E. Oliphant for the SciPy library (http://www.scipy.org/)
Copyright © 2003-2009 SciPy Developers.
===============================================================================
The LinearConstraint, LinearObjectiveFunction, LinearOptimizer,
RelationShip, SimplexSolver and SimplexTableau classes in package
org.apache.commons.math3.optimization.linear include software developed by
Benjamin McCann (http://www.benmccann.com) and distributed with
the following copyright: Copyright 2009 Google Inc.
===============================================================================
This product includes software developed by the
University of Chicago, as Operator of Argonne National
Laboratory.
The LevenbergMarquardtOptimizer class in package
org.apache.commons.math3.optimization.general includes software
translated from the lmder, lmpar and qrsolv Fortran routines
from the Minpack package
Minpack Copyright Notice (1999) University of Chicago. All rights reserved
===============================================================================
The GraggBulirschStoerIntegrator class in package
org.apache.commons.math3.ode.nonstiff includes software translated
from the odex Fortran routine developed by E. Hairer and G. Wanner.
Original source copyright:
Copyright (c) 2004, Ernst Hairer
===============================================================================
The EigenDecompositionImpl class in package
org.apache.commons.math3.linear includes software translated
from some LAPACK Fortran routines. Original source copyright:
Copyright (c) 1992-2008 The University of Tennessee. All rights reserved.
===============================================================================
The MersenneTwister class in package org.apache.commons.math3.random
includes software translated from the 2002-01-26 version of
the Mersenne-Twister generator written in C by Makoto Matsumoto and Takuji
Nishimura. Original source copyright:
Copyright (C) 1997 - 2002, Makoto Matsumoto and Takuji Nishimura,
All rights reserved
===============================================================================
The LocalizedFormatsTest class in the unit tests is an adapted version of
the OrekitMessagesTest class from the orekit library distributed under the
terms of the Apache 2 licence. Original source copyright:
Copyright 2010 CS Systèmes d'Information
===============================================================================
The HermiteInterpolator class and its corresponding test have been imported from
the orekit library distributed under the terms of the Apache 2 licence. Original
source copyright:
Copyright 2010-2012 CS Systèmes d'Information
===============================================================================
The creation of the package "o.a.c.m.analysis.integration.gauss" was inspired
by an original code donated by Sébastien Brisard.
===============================================================================
(ASLv2) Apache Jakarta HttpClient
The following NOTICE information applies:
Apache Jakarta HttpClient
Copyright 1999-2007 The Apache Software Foundation
(ASLv2) Apache Commons Codec
The following NOTICE information applies:
Apache Commons Codec
Copyright 2002-2014 The Apache Software Foundation
src/test/org/apache/commons/codec/language/DoubleMetaphoneTest.java
contains test data from http://aspell.net/test/orig/batch0.tab.
Copyright (C) 2002 Kevin Atkinson (kevina@gnu.org)
===============================================================================
The content of package org.apache.commons.codec.language.bm has been translated
from the original php source code available at http://stevemorse.org/phoneticinfo.htm
with permission from the original authors.
Original source copyright:
Copyright (c) 2008 Alexander Beider & Stephen P. Morse.
(ASLv2) Apache Commons IO
The following NOTICE information applies:
Apache Commons IO
Copyright 2002-2016 The Apache Software Foundation
(ASLv2) Apache Commons Net
The following NOTICE information applies:
Apache Commons Net
Copyright 2001-2013 The Apache Software Foundation
(ASLv2) Apache Commons Collections
The following NOTICE information applies:
Apache Commons Collections
Copyright 2001-2016 The Apache Software Foundation
(ASLv2) Apache Commons Crypto
The following NOTICE information applies:
Apache Commons Crypto
Copyright 2016-2016 The Apache Software Foundation
(ASLv2) Jettison
The following NOTICE information applies:
Copyright 2006 Envoi Solutions LLC
(ASLv2) Apache Commons Lang
The following NOTICE information applies:
Apache Commons Lang
Copyright 2001-2011 The Apache Software Foundation
(ASLv2) Apache log4j
The following NOTICE information applies:
Apache log4j
Copyright 2007 The Apache Software Foundation
(ASLv2) Apache HttpComponents
The following NOTICE information applies:
Apache HttpClient
Copyright 1999-2015 The Apache Software Foundation
Apache HttpComponents HttpCore
Copyright 2005-2011 The Apache Software Foundation
(ASLv2) Apache Commons Configuration
The following NOTICE information applies:
Apache Commons Configuration
Copyright 2001-2008 The Apache Software Foundation
(ASLv2) Apache Jakarta Commons Digester
The following NOTICE information applies:
Apache Jakarta Commons Digester
Copyright 2001-2006 The Apache Software Foundation
(ASLv2) Apache Commons BeanUtils
The following NOTICE information applies:
Apache Commons BeanUtils
Copyright 2000-2008 The Apache Software Foundation
(ASLv2) Apache Avro
The following NOTICE information applies:
Apache Avro
Copyright 2009-2017 The Apache Software Foundation
(ASLv2) Snappy Java
The following NOTICE information applies:
This product includes software developed by Google
Snappy: http://code.google.com/p/snappy/ (New BSD License)
This product includes software developed by Apache
PureJavaCrc32C from apache-hadoop-common http://hadoop.apache.org/
(Apache 2.0 license)
This library containd statically linked libstdc++. This inclusion is allowed by
"GCC RUntime Library Exception"
http://gcc.gnu.org/onlinedocs/libstdc++/manual/license.html
(ASLv2) ApacheDS
The following NOTICE information applies:
ApacheDS
Copyright 2003-2013 The Apache Software Foundation
(ASLv2) Apache ZooKeeper
The following NOTICE information applies:
Apache ZooKeeper
Copyright 2009-2012 The Apache Software Foundation
(ASLv2) Apache Commons Compress
The following NOTICE information applies:
Apache Commons Compress
Copyright 2002-2017 The Apache Software Foundation
The files in the package org.apache.commons.compress.archivers.sevenz
were derived from the LZMA SDK, version 9.20 (C/ and CPP/7zip/),
which has been placed in the public domain:
"LZMA SDK is placed in the public domain." (http://www.7-zip.org/sdk.html)
(ASLv2) Apache Commons Daemon
The following NOTICE information applies:
Apache Commons Daemon
Copyright 1999-2013 The Apache Software Foundation
(ASLv2) The Netty Project
The following NOTICE information applies:
The Netty Project
Copyright 2011 The Netty Project
(ASLv2) Apache Xerces Java
The following NOTICE information applies:
Apache Xerces Java
Copyright 1999-2007 The Apache Software Foundation
This product includes software developed at
The Apache Software Foundation (http://www.apache.org/).
Portions of this software were originally based on the following:
- software copyright (c) 1999, IBM Corporation., http://www.ibm.com.
- software copyright (c) 1999, Sun Microsystems., http://www.sun.com.
- voluntary contributions made by Paul Eng on behalf of the
Apache Software Foundation that were originally developed at iClick, Inc.,
software copyright (c) 1999.
(ASLv2) Google Guice
The following NOTICE information applies:
Google Guice - Core Library
Copyright 2006-2011 Google, Inc.
Google Guice - Extensions - Servlet
Copyright 2006-2011 Google, Inc.
(ASLv2) HBase Common
The following NOTICE information applies:
This product includes portions of the Guava project v14 and v21, specifically
'hbase-common/src/main/java/org/apache/hadoop/hbase/io/LimitInputStream.java'
'hbase-common/src/main/java/org/apache/hadoop/hbase/util/Bytes.java'
'hbase-common/src/main/java/org/apache/hadoop/hbase/util/ByteBufferUtils.java'
Copyright (C) 2007 The Guava Authors
Licensed under the Apache License, Version 2.0
(ASLv2) Apache HTrace Core
The following NOTICE information applies:
Copyright 2016 The Apache Software Foundation
Apache HTrace includes an Apache Thrift connector to Zipkin. Zipkin
is a distributed tracing system that is Apache 2.0 Licensed.
Copyright 2012 Twitter, Inc.
(ASLv2) Jackson Core ASL
The following NOTICE information applies:
This product currently only contains code developed by authors
of specific components, as identified by the source code files;
if such notes are missing files have been created by
Tatu Saloranta.
For additional credits (generally to people who reported problems)
see CREDITS file.
(ASLv2) Jackson Mapper ASL
The following NOTICE information applies:
This product currently only contains code developed by authors
of specific components, as identified by the source code files;
if such notes are missing files have been created by
Tatu Saloranta.
For additional credits (generally to people who reported problems)
see CREDITS file.
(ASLv2) Audience Annotations
The following NOTICE information applies:
Apache Yetus
Copyright 2008-2018 The Apache Software Foundation
(ASLv2) Jetty
The following NOTICE information applies:
Jetty Web Container
Copyright 1995-2019 Mort Bay Consulting Pty Ltd.
(ASLv2) Dropwizard Metrics
Copyright 2010-2013 Coda Hale and Yammer, Inc.
This product includes software developed by Coda Hale and Yammer, Inc.
This product includes code derived from the JSR-166 project (ThreadLocalRandom, Striped64,
LongAdder), which was released with the following comments:
Written by Doug Lea with assistance from members of JCP JSR-166
Expert Group and released to the public domain, as explained at
http://creativecommons.org/publicdomain/zero/1.0/

View File

@ -1,154 +0,0 @@
<?xml version="1.0" encoding="UTF-8"?>
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 https://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<parent>
<groupId>org.apache.nifi</groupId>
<artifactId>nifi-hbase_2-client-service-bundle</artifactId>
<version>2.0.0-SNAPSHOT</version>
</parent>
<artifactId>nifi-hbase_2-client-service</artifactId>
<packaging>jar</packaging>
<dependencies>
<dependency>
<groupId>org.apache.nifi</groupId>
<artifactId>nifi-hbase-client-service-api</artifactId>
</dependency>
<dependency>
<groupId>org.apache.nifi</groupId>
<artifactId>nifi-lookup-service-api</artifactId>
</dependency>
<dependency>
<groupId>org.apache.nifi</groupId>
<artifactId>nifi-hadoop-utils</artifactId>
<version>2.0.0-SNAPSHOT</version>
</dependency>
<dependency>
<groupId>org.apache.nifi</groupId>
<artifactId>nifi-security-kerberos</artifactId>
<version>2.0.0-SNAPSHOT</version>
</dependency>
<dependency>
<groupId>org.apache.nifi</groupId>
<artifactId>nifi-security-kerberos-api</artifactId>
</dependency>
<dependency>
<groupId>org.apache.nifi</groupId>
<artifactId>nifi-distributed-cache-client-service-api</artifactId>
</dependency>
<dependency>
<groupId>org.apache.nifi</groupId>
<artifactId>nifi-record</artifactId>
</dependency>
<dependency>
<groupId>org.apache.nifi</groupId>
<artifactId>nifi-kerberos-user-service-api</artifactId>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-common</artifactId>
<version>${hadoop.version}</version>
<exclusions>
<exclusion>
<groupId>log4j</groupId>
<artifactId>log4j</artifactId>
</exclusion>
<exclusion>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-log4j12</artifactId>
</exclusion>
<exclusion>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-reload4j</artifactId>
</exclusion>
<exclusion>
<groupId>commons-logging</groupId>
<artifactId>commons-logging</artifactId>
</exclusion>
<!-- Excluded for Hadoop 3.4.0 -->
<exclusion>
<groupId>org.bouncycastle</groupId>
<artifactId>bcprov-jdk15on</artifactId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>org.bouncycastle</groupId>
<artifactId>bcprov-jdk18on</artifactId>
</dependency>
<dependency>
<groupId>org.apache.hbase</groupId>
<artifactId>hbase-client</artifactId>
<version>${hbase.version}</version>
<exclusions>
<exclusion>
<groupId>commons-codec</groupId>
<artifactId>commons-codec</artifactId>
</exclusion>
<exclusion>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-log4j12</artifactId>
</exclusion>
<exclusion>
<groupId>log4j</groupId>
<artifactId>log4j</artifactId>
</exclusion>
<exclusion>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-reload4j</artifactId>
</exclusion>
<exclusion>
<groupId>ch.qos.reload4j</groupId>
<artifactId>reload4j</artifactId>
</exclusion>
<exclusion>
<groupId>com.google.code.findbugs</groupId>
<artifactId>jsr305</artifactId>
</exclusion>
<exclusion>
<groupId>commons-logging</groupId>
<artifactId>commons-logging</artifactId>
</exclusion>
<exclusion>
<groupId>jdk.tools</groupId>
<artifactId>jdk.tools</artifactId>
</exclusion>
<exclusion>
<groupId>org.apache.htrace</groupId>
<artifactId>htrace-core4</artifactId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>org.apache.hbase</groupId>
<artifactId>hbase-common</artifactId>
<version>${hbase.version}</version>
</dependency>
<dependency>
<groupId>com.github.stephenc.findbugs</groupId>
<artifactId>findbugs-annotations</artifactId>
<version>1.3.9-1</version>
</dependency>
<dependency>
<groupId>commons-codec</groupId>
<artifactId>commons-codec</artifactId>
</dependency>
<dependency>
<groupId>org.apache.commons</groupId>
<artifactId>commons-lang3</artifactId>
</dependency>
</dependencies>
</project>

View File

@ -1,157 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.nifi.hbase;
import org.apache.nifi.annotation.lifecycle.OnDisabled;
import org.apache.nifi.annotation.lifecycle.OnEnabled;
import org.apache.nifi.components.PropertyDescriptor;
import org.apache.nifi.controller.AbstractControllerService;
import org.apache.nifi.controller.ConfigurationContext;
import org.apache.nifi.hbase.scan.Column;
import org.apache.nifi.hbase.scan.ResultCell;
import org.apache.nifi.processor.util.StandardValidators;
import org.apache.nifi.reporting.InitializationException;
import java.io.IOException;
import java.nio.charset.Charset;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import static org.apache.nifi.hbase.VisibilityLabelUtils.AUTHORIZATIONS;
public abstract class AbstractHBaseLookupService extends AbstractControllerService {
static final PropertyDescriptor HBASE_CLIENT_SERVICE = new PropertyDescriptor.Builder()
.name("hbase-client-service")
.displayName("HBase Client Service")
.description("Specifies the HBase Client Controller Service to use for accessing HBase.")
.required(true)
.identifiesControllerService(HBaseClientService.class)
.build();
static final PropertyDescriptor TABLE_NAME = new PropertyDescriptor.Builder()
.name("hb-lu-table-name")
.displayName("Table Name")
.description("The name of the table where look ups will be run.")
.required(true)
.addValidator(StandardValidators.NON_BLANK_VALIDATOR)
.build();
static final PropertyDescriptor RETURN_COLUMNS = new PropertyDescriptor.Builder()
.name("hb-lu-return-cols")
.displayName("Columns")
.description("A comma-separated list of \\\"<colFamily>:<colQualifier>\\\" pairs to return when scanning. " +
"To return all columns for a given family, leave off the qualifier such as \\\"<colFamily1>,<colFamily2>\\\".")
.required(false)
.addValidator(StandardValidators.NON_BLANK_VALIDATOR)
.build();
static final PropertyDescriptor CHARSET = new PropertyDescriptor.Builder()
.name("hb-lu-charset")
.displayName("Character Set")
.description("Specifies the character set used to decode bytes retrieved from HBase.")
.required(true)
.defaultValue("UTF-8")
.addValidator(StandardValidators.CHARACTER_SET_VALIDATOR)
.build();
static final String ROW_KEY_KEY = "rowKey";
protected static final Set<String> REQUIRED_KEYS = Collections.unmodifiableSet(new HashSet<>(Arrays.asList(ROW_KEY_KEY)));
static final List<PropertyDescriptor> PROPERTIES;
static {
final List<PropertyDescriptor> props = new ArrayList<>();
props.add(HBASE_CLIENT_SERVICE);
props.add(TABLE_NAME);
props.add(AUTHORIZATIONS);
props.add(RETURN_COLUMNS);
props.add(CHARSET);
PROPERTIES = Collections.unmodifiableList(props);
}
protected String tableName;
protected List<Column> columns;
protected Charset charset;
protected HBaseClientService hBaseClientService;
protected List<String> authorizations;
@Override
protected List<PropertyDescriptor> getSupportedPropertyDescriptors() {
return PROPERTIES;
}
@OnEnabled
public void onEnabled(final ConfigurationContext context) throws InitializationException, IOException, InterruptedException {
this.hBaseClientService = context.getProperty(HBASE_CLIENT_SERVICE).asControllerService(HBaseClientService.class);
this.tableName = context.getProperty(TABLE_NAME).getValue();
this.columns = getColumns(context.getProperty(RETURN_COLUMNS).getValue());
this.charset = Charset.forName(context.getProperty(CHARSET).getValue());
this.authorizations = VisibilityLabelUtils.getAuthorizations(context);
}
@OnDisabled
public void onDisabled() {
this.hBaseClientService = null;
this.tableName = null;
this.columns = null;
this.charset = null;
}
protected List<Column> getColumns(final String columnsValue) {
final String[] columns = (columnsValue == null || columnsValue.isEmpty() ? new String[0] : columnsValue.split(","));
final List<Column> columnsList = new ArrayList<>();
for (final String column : columns) {
if (column.contains(":")) {
final String[] parts = column.trim().split(":");
final byte[] cf = parts[0].getBytes(StandardCharsets.UTF_8);
final byte[] cq = parts[1].getBytes(StandardCharsets.UTF_8);
columnsList.add(new Column(cf, cq));
} else {
final byte[] cf = column.trim().getBytes(StandardCharsets.UTF_8);
columnsList.add(new Column(cf, null));
}
}
return columnsList;
}
protected Map<String, Object> scan(byte[] rowKeyBytes) throws IOException {
final Map<String, Object> values = new HashMap<>();
hBaseClientService.scan(tableName, rowKeyBytes, rowKeyBytes, columns, authorizations, (byte[] row, ResultCell[] resultCells) -> {
for (final ResultCell cell : resultCells) {
final byte[] qualifier = Arrays.copyOfRange(cell.getQualifierArray(), cell.getQualifierOffset(), cell.getQualifierOffset() + cell.getQualifierLength());
final byte[] value = Arrays.copyOfRange(cell.getValueArray(), cell.getValueOffset(), cell.getValueOffset() + cell.getValueLength());
values.put(new String(qualifier, charset), new String(value, charset));
}
});
return values;
}
}

View File

@ -1,312 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.nifi.hbase;
import org.apache.nifi.annotation.documentation.CapabilityDescription;
import org.apache.nifi.annotation.documentation.SeeAlso;
import org.apache.nifi.annotation.documentation.Tags;
import org.apache.nifi.annotation.lifecycle.OnEnabled;
import org.apache.nifi.components.PropertyDescriptor;
import org.apache.nifi.components.Validator;
import org.apache.nifi.controller.AbstractControllerService;
import org.apache.nifi.controller.ConfigurationContext;
import org.apache.nifi.distributed.cache.client.AtomicCacheEntry;
import org.apache.nifi.distributed.cache.client.AtomicDistributedMapCacheClient;
import org.apache.nifi.distributed.cache.client.Deserializer;
import org.apache.nifi.distributed.cache.client.Serializer;
import org.apache.nifi.expression.ExpressionLanguageScope;
import org.apache.nifi.hbase.put.PutColumn;
import org.apache.nifi.hbase.put.PutFlowFile;
import org.apache.nifi.hbase.scan.Column;
import org.apache.nifi.hbase.scan.ResultCell;
import org.apache.nifi.hbase.scan.ResultHandler;
import org.apache.nifi.processor.util.StandardValidators;
import org.apache.nifi.reporting.InitializationException;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.List;
import java.util.Map;
import static org.apache.nifi.hbase.VisibilityLabelUtils.AUTHORIZATIONS;
@Tags({"distributed", "cache", "state", "map", "cluster", "hbase"})
@SeeAlso(classNames = {"org.apache.nifi.hbase.HBase_2_ClientService"})
@CapabilityDescription("Provides the ability to use an HBase table as a cache, in place of a DistributedMapCache."
+ " Uses a HBase_2_ClientService controller to communicate with HBase.")
public class HBase_2_ClientMapCacheService extends AbstractControllerService implements AtomicDistributedMapCacheClient<byte[]> {
static final PropertyDescriptor HBASE_CLIENT_SERVICE = new PropertyDescriptor.Builder()
.name("HBase Client Service")
.description("Specifies the HBase Client Controller Service to use for accessing HBase.")
.required(true)
.identifiesControllerService(HBaseClientService.class)
.build();
public static final PropertyDescriptor HBASE_CACHE_TABLE_NAME = new PropertyDescriptor.Builder()
.name("HBase Cache Table Name")
.description("Name of the table on HBase to use for the cache.")
.required(true)
.expressionLanguageSupported(ExpressionLanguageScope.ENVIRONMENT)
.addValidator(StandardValidators.NON_EMPTY_VALIDATOR)
.build();
public static final PropertyDescriptor HBASE_COLUMN_FAMILY = new PropertyDescriptor.Builder()
.name("HBase Column Family")
.description("Name of the column family on HBase to use for the cache.")
.required(true)
.expressionLanguageSupported(ExpressionLanguageScope.ENVIRONMENT)
.defaultValue("f")
.addValidator(StandardValidators.NON_EMPTY_VALIDATOR)
.build();
public static final PropertyDescriptor HBASE_COLUMN_QUALIFIER = new PropertyDescriptor.Builder()
.name("HBase Column Qualifier")
.description("Name of the column qualifier on HBase to use for the cache")
.defaultValue("q")
.required(true)
.expressionLanguageSupported(ExpressionLanguageScope.ENVIRONMENT)
.addValidator(StandardValidators.NON_EMPTY_VALIDATOR)
.build();
public static final PropertyDescriptor VISIBILITY_EXPRESSION = new PropertyDescriptor.Builder()
.name("hbase-cache-visibility-expression")
.displayName("Visibility Expression")
.description("The default visibility expression to apply to cells when visibility expression support is enabled.")
.defaultValue("")
.addValidator(Validator.VALID)
.expressionLanguageSupported(ExpressionLanguageScope.ENVIRONMENT)
.required(false)
.build();
@Override
protected List<PropertyDescriptor> getSupportedPropertyDescriptors() {
final List<PropertyDescriptor> descriptors = new ArrayList<>();
descriptors.add(HBASE_CACHE_TABLE_NAME);
descriptors.add(AUTHORIZATIONS);
descriptors.add(VISIBILITY_EXPRESSION);
descriptors.add(HBASE_CLIENT_SERVICE);
descriptors.add(HBASE_COLUMN_FAMILY);
descriptors.add(HBASE_COLUMN_QUALIFIER);
return descriptors;
}
// Other threads may call @OnEnabled so these are marked volatile to ensure other class methods read the updated value
private volatile String hBaseCacheTableName;
private volatile HBaseClientService hBaseClientService;
private volatile String hBaseColumnFamily;
private volatile byte[] hBaseColumnFamilyBytes;
private volatile String hBaseColumnQualifier;
private volatile byte[] hBaseColumnQualifierBytes;
private List<String> authorizations;
private String defaultVisibilityExpression;
@OnEnabled
public void onConfigured(final ConfigurationContext context) throws InitializationException {
hBaseClientService = context.getProperty(HBASE_CLIENT_SERVICE).asControllerService(HBaseClientService.class);
hBaseCacheTableName = context.getProperty(HBASE_CACHE_TABLE_NAME).evaluateAttributeExpressions().getValue();
hBaseColumnFamily = context.getProperty(HBASE_COLUMN_FAMILY).evaluateAttributeExpressions().getValue();
hBaseColumnQualifier = context.getProperty(HBASE_COLUMN_QUALIFIER).evaluateAttributeExpressions().getValue();
hBaseColumnFamilyBytes = hBaseColumnFamily.getBytes(StandardCharsets.UTF_8);
hBaseColumnQualifierBytes = hBaseColumnQualifier.getBytes(StandardCharsets.UTF_8);
authorizations = VisibilityLabelUtils.getAuthorizations(context);
if (context.getProperty(VISIBILITY_EXPRESSION).isSet()) {
defaultVisibilityExpression = context.getProperty(VISIBILITY_EXPRESSION).evaluateAttributeExpressions().getValue();
} else {
defaultVisibilityExpression = null;
}
}
private <T> byte[] serialize(final T value, final Serializer<T> serializer) throws IOException {
final ByteArrayOutputStream baos = new ByteArrayOutputStream();
serializer.serialize(value, baos);
return baos.toByteArray();
}
private <T> T deserialize(final byte[] value, final Deserializer<T> deserializer) throws IOException {
return deserializer.deserialize(value);
}
@Override
public <K, V> boolean putIfAbsent(final K key, final V value, final Serializer<K> keySerializer, final Serializer<V> valueSerializer) throws IOException {
final byte[] rowIdBytes = serialize(key, keySerializer);
final byte[] valueBytes = serialize(value, valueSerializer);
final PutColumn putColumn = new PutColumn(hBaseColumnFamilyBytes, hBaseColumnQualifierBytes, valueBytes, defaultVisibilityExpression);
return hBaseClientService.checkAndPut(hBaseCacheTableName, rowIdBytes, hBaseColumnFamilyBytes, hBaseColumnQualifierBytes, null, putColumn);
}
@Override
public <K, V> void put(final K key, final V value, final Serializer<K> keySerializer, final Serializer<V> valueSerializer) throws IOException {
List<PutColumn> putColumns = new ArrayList<PutColumn>(1);
final byte[] rowIdBytes = serialize(key, keySerializer);
final byte[] valueBytes = serialize(value, valueSerializer);
final PutColumn putColumn = new PutColumn(hBaseColumnFamilyBytes, hBaseColumnQualifierBytes, valueBytes, defaultVisibilityExpression);
putColumns.add(putColumn);
hBaseClientService.put(hBaseCacheTableName, rowIdBytes, putColumns);
}
@Override
public <K, V> void putAll(Map<K, V> keysAndValues, Serializer<K> keySerializer, Serializer<V> valueSerializer) throws IOException {
List<PutFlowFile> puts = new ArrayList<>();
for (Map.Entry<K, V> entry : keysAndValues.entrySet()) {
List<PutColumn> putColumns = new ArrayList<PutColumn>(1);
final byte[] rowIdBytes = serialize(entry.getKey(), keySerializer);
final byte[] valueBytes = serialize(entry.getValue(), valueSerializer);
final PutColumn putColumn = new PutColumn(hBaseColumnFamilyBytes, hBaseColumnQualifierBytes, valueBytes, defaultVisibilityExpression);
putColumns.add(putColumn);
puts.add(new PutFlowFile(hBaseCacheTableName, rowIdBytes, putColumns, null));
}
hBaseClientService.put(hBaseCacheTableName, puts);
}
@Override
public <K> boolean containsKey(final K key, final Serializer<K> keySerializer) throws IOException {
final byte[] rowIdBytes = serialize(key, keySerializer);
final HBaseRowHandler handler = new HBaseRowHandler();
final List<Column> columnsList = new ArrayList<Column>(0);
columnsList.add(new Column(hBaseColumnFamilyBytes, hBaseColumnQualifierBytes));
hBaseClientService.scan(hBaseCacheTableName, rowIdBytes, rowIdBytes, columnsList, authorizations, handler);
return (handler.numRows() > 0);
}
/**
* Note that the implementation of getAndPutIfAbsent is not atomic.
* The putIfAbsent is atomic, but a getAndPutIfAbsent does a get and then a putIfAbsent.
* If there is an existing value and it is updated in betweern the two steps, then the existing (unmodified) value will be returned.
* If the existing value was deleted between the two steps, getAndPutIfAbsent will correctly return null.
* This should not generally be an issue with cache processors such as DetectDuplicate.
*
*/
@Override
public <K, V> V getAndPutIfAbsent(final K key, final V value, final Serializer<K> keySerializer, final Serializer<V> valueSerializer, final Deserializer<V> valueDeserializer) throws IOException {
// Between the get and the putIfAbsent, the value could be deleted or updated.
// Logic below takes care of the deleted case but not the updated case.
// This is probably fine since DistributedMapCache and DetectDuplicate expect to receive the original cache value
// Could possibly be fixed by implementing AtomicDistributedMapCache (Map Cache protocol version 2)
final V got = get(key, keySerializer, valueDeserializer);
final boolean wasAbsent = putIfAbsent(key, value, keySerializer, valueSerializer);
if (!wasAbsent) return got;
else return null;
}
@Override
public <K, V> V get(final K key, final Serializer<K> keySerializer, final Deserializer<V> valueDeserializer) throws IOException {
final byte[] rowIdBytes = serialize(key, keySerializer);
final HBaseRowHandler handler = new HBaseRowHandler();
final List<Column> columnsList = new ArrayList<Column>(0);
columnsList.add(new Column(hBaseColumnFamilyBytes, hBaseColumnQualifierBytes));
hBaseClientService.scan(hBaseCacheTableName, rowIdBytes, rowIdBytes, columnsList, authorizations, handler);
if (handler.numRows() > 1) {
throw new IOException("Found multiple rows in HBase for key");
} else if (handler.numRows() == 1) {
return deserialize( handler.getLastResultBytes(), valueDeserializer);
} else {
return null;
}
}
@Override
public <K> boolean remove(final K key, final Serializer<K> keySerializer) throws IOException {
final boolean contains = containsKey(key, keySerializer);
if (contains) {
final byte[] rowIdBytes = serialize(key, keySerializer);
final DeleteRequest deleteRequest = new DeleteRequest(rowIdBytes, hBaseColumnFamilyBytes, hBaseColumnQualifierBytes, null);
hBaseClientService.deleteCells(hBaseCacheTableName, Collections.singletonList(deleteRequest));
}
return contains;
}
@Override
public void close() throws IOException {
}
@Override
protected void finalize() throws Throwable {
}
@Override
public <K, V> AtomicCacheEntry<K, V, byte[]> fetch(K key, Serializer<K> keySerializer, Deserializer<V> valueDeserializer) throws IOException {
final byte[] rowIdBytes = serialize(key, keySerializer);
final HBaseRowHandler handler = new HBaseRowHandler();
final List<Column> columnsList = new ArrayList<>(1);
columnsList.add(new Column(hBaseColumnFamilyBytes, hBaseColumnQualifierBytes));
hBaseClientService.scan(hBaseCacheTableName, rowIdBytes, rowIdBytes, columnsList, authorizations, handler);
if (handler.numRows() > 1) {
throw new IOException("Found multiple rows in HBase for key");
} else if (handler.numRows() == 1) {
return new AtomicCacheEntry<>(key, deserialize(handler.getLastResultBytes(), valueDeserializer), handler.getLastResultBytes());
} else {
return null;
}
}
@Override
public <K, V> boolean replace(AtomicCacheEntry<K, V, byte[]> entry, Serializer<K> keySerializer, Serializer<V> valueSerializer) throws IOException {
final byte[] rowIdBytes = serialize(entry.getKey(), keySerializer);
final byte[] valueBytes = serialize(entry.getValue(), valueSerializer);
final byte[] revision = entry.getRevision().orElse(null);
final PutColumn putColumn = new PutColumn(hBaseColumnFamilyBytes, hBaseColumnQualifierBytes, valueBytes, defaultVisibilityExpression);
// If the current revision is unset then only insert the row if it doesn't already exist.
return hBaseClientService.checkAndPut(hBaseCacheTableName, rowIdBytes, hBaseColumnFamilyBytes, hBaseColumnQualifierBytes, revision, putColumn);
}
private class HBaseRowHandler implements ResultHandler {
private int numRows = 0;
private byte[] lastResultBytes;
@Override
public void handle(byte[] row, ResultCell[] resultCells) {
numRows += 1;
for (final ResultCell resultCell : resultCells) {
lastResultBytes = Arrays.copyOfRange(resultCell.getValueArray(), resultCell.getValueOffset(), resultCell.getValueLength() + resultCell.getValueOffset());
}
}
public int numRows() {
return numRows;
}
public byte[] getLastResultBytes() {
return lastResultBytes;
}
}
}

View File

@ -1,885 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.nifi.hbase;
import java.io.IOException;
import java.nio.charset.StandardCharsets;
import java.security.PrivilegedExceptionAction;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.concurrent.atomic.AtomicReference;
import java.util.stream.Collectors;
import org.apache.commons.lang3.StringUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.Cell;
import org.apache.hadoop.hbase.ClusterMetrics;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.ServerName;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.Admin;
import org.apache.hadoop.hbase.client.CheckAndMutate;
import org.apache.hadoop.hbase.client.CheckAndMutateResult;
import org.apache.hadoop.hbase.client.Connection;
import org.apache.hadoop.hbase.client.ConnectionFactory;
import org.apache.hadoop.hbase.client.Delete;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.client.RegionInfo;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.client.ResultScanner;
import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.hbase.client.Table;
import org.apache.hadoop.hbase.exceptions.DeserializationException;
import org.apache.hadoop.hbase.filter.Filter;
import org.apache.hadoop.hbase.filter.ParseFilter;
import org.apache.hadoop.hbase.security.visibility.Authorizations;
import org.apache.hadoop.hbase.security.visibility.CellVisibility;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.security.UserGroupInformation;
import org.apache.nifi.annotation.behavior.DynamicProperty;
import org.apache.nifi.annotation.behavior.RequiresInstanceClassLoading;
import org.apache.nifi.annotation.behavior.Restricted;
import org.apache.nifi.annotation.behavior.Restriction;
import org.apache.nifi.annotation.documentation.CapabilityDescription;
import org.apache.nifi.annotation.documentation.Tags;
import org.apache.nifi.annotation.lifecycle.OnDisabled;
import org.apache.nifi.annotation.lifecycle.OnEnabled;
import org.apache.nifi.components.PropertyDescriptor;
import org.apache.nifi.components.RequiredPermission;
import org.apache.nifi.components.ValidationContext;
import org.apache.nifi.components.ValidationResult;
import org.apache.nifi.components.resource.ResourceCardinality;
import org.apache.nifi.components.resource.ResourceType;
import org.apache.nifi.controller.AbstractControllerService;
import org.apache.nifi.controller.ConfigurationContext;
import org.apache.nifi.controller.ControllerServiceInitializationContext;
import org.apache.nifi.expression.ExpressionLanguageScope;
import org.apache.nifi.hadoop.SecurityUtil;
import org.apache.nifi.hbase.put.PutColumn;
import org.apache.nifi.hbase.put.PutFlowFile;
import org.apache.nifi.hbase.scan.Column;
import org.apache.nifi.hbase.scan.HBaseRegion;
import org.apache.nifi.hbase.scan.ResultCell;
import org.apache.nifi.hbase.scan.ResultHandler;
import org.apache.nifi.kerberos.KerberosUserService;
import org.apache.nifi.migration.PropertyConfiguration;
import org.apache.nifi.processor.util.StandardValidators;
import org.apache.nifi.security.krb.KerberosUser;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@RequiresInstanceClassLoading
@Tags({ "hbase", "client"})
@CapabilityDescription("Implementation of HBaseClientService using the HBase 2.1.1 client. This service can be configured " +
"by providing a comma-separated list of configuration files, or by specifying values for the other properties. If configuration files " +
"are provided, they will be loaded first, and the values of the additional properties will override the values from " +
"the configuration files. In addition, any user defined properties on the processor will also be passed to the HBase " +
"configuration.")
@DynamicProperty(name = "The name of an HBase configuration property.", value = "The value of the given HBase configuration property.",
description = "These properties will be set on the HBase configuration after loading any provided configuration files.")
@Restricted(
restrictions = {
@Restriction(
requiredPermission = RequiredPermission.REFERENCE_REMOTE_RESOURCES,
explanation = "Client JAR Location can reference resources over HTTP"
)
}
)
public class HBase_2_ClientService extends AbstractControllerService implements HBaseClientService {
private static final Logger logger = LoggerFactory.getLogger(HBase_2_ClientService.class);
static final PropertyDescriptor KERBEROS_USER_SERVICE = new PropertyDescriptor.Builder()
.name("kerberos-user-service")
.displayName("Kerberos User Service")
.description("Specifies the Kerberos User Controller Service that should be used for authenticating with Kerberos")
.identifiesControllerService(KerberosUserService.class)
.required(false)
.build();
static final PropertyDescriptor HADOOP_CONF_FILES = new PropertyDescriptor.Builder()
.name("Hadoop Configuration Files")
.description("Comma-separated list of Hadoop Configuration files," +
" such as hbase-site.xml and core-site.xml for kerberos, " +
"including full paths to the files.")
.identifiesExternalResource(ResourceCardinality.MULTIPLE, ResourceType.FILE, ResourceType.DIRECTORY)
.expressionLanguageSupported(ExpressionLanguageScope.ENVIRONMENT)
.build();
static final PropertyDescriptor ZOOKEEPER_QUORUM = new PropertyDescriptor.Builder()
.name("ZooKeeper Quorum")
.description("Comma-separated list of ZooKeeper hosts for HBase. Required if Hadoop Configuration Files are not provided.")
.addValidator(StandardValidators.NON_EMPTY_VALIDATOR)
.expressionLanguageSupported(ExpressionLanguageScope.ENVIRONMENT)
.build();
static final PropertyDescriptor ZOOKEEPER_CLIENT_PORT = new PropertyDescriptor.Builder()
.name("ZooKeeper Client Port")
.description("The port on which ZooKeeper is accepting client connections. Required if Hadoop Configuration Files are not provided.")
.addValidator(StandardValidators.PORT_VALIDATOR)
.expressionLanguageSupported(ExpressionLanguageScope.ENVIRONMENT)
.build();
static final PropertyDescriptor ZOOKEEPER_ZNODE_PARENT = new PropertyDescriptor.Builder()
.name("ZooKeeper ZNode Parent")
.description("The ZooKeeper ZNode Parent value for HBase (example: /hbase). Required if Hadoop Configuration Files are not provided.")
.addValidator(StandardValidators.NON_EMPTY_VALIDATOR)
.expressionLanguageSupported(ExpressionLanguageScope.ENVIRONMENT)
.build();
static final PropertyDescriptor HBASE_CLIENT_RETRIES = new PropertyDescriptor.Builder()
.name("HBase Client Retries")
.description("The number of times the HBase client will retry connecting. Required if Hadoop Configuration Files are not provided.")
.addValidator(StandardValidators.POSITIVE_INTEGER_VALIDATOR)
.defaultValue("1")
.expressionLanguageSupported(ExpressionLanguageScope.ENVIRONMENT)
.build();
// This property is never referenced directly but is necessary so that the classpath will be dynamically modified.
static final PropertyDescriptor PHOENIX_CLIENT_JAR_LOCATION = new PropertyDescriptor.Builder()
.name("Phoenix Client JAR Location")
.description("The full path to the Phoenix client JAR. Required if Phoenix is installed on top of HBase.")
.identifiesExternalResource(ResourceCardinality.SINGLE, ResourceType.FILE, ResourceType.DIRECTORY, ResourceType.URL)
.expressionLanguageSupported(ExpressionLanguageScope.ENVIRONMENT)
.dynamicallyModifiesClasspath(true)
.build();
static final String HBASE_CONF_ZK_QUORUM = "hbase.zookeeper.quorum";
static final String HBASE_CONF_ZK_PORT = "hbase.zookeeper.property.clientPort";
static final String HBASE_CONF_ZNODE_PARENT = "zookeeper.znode.parent";
static final String HBASE_CONF_CLIENT_RETRIES = "hbase.client.retries.number";
private volatile Connection connection;
private volatile UserGroupInformation ugi;
private final AtomicReference<KerberosUser> kerberosUserReference = new AtomicReference<>();
private volatile String masterAddress;
private List<PropertyDescriptor> properties;
// Holder of cached Configuration information so validation does not reload the same config over and over
private final AtomicReference<ValidationResources> validationResourceHolder = new AtomicReference<>();
@Override
protected void init(ControllerServiceInitializationContext config) {
List<PropertyDescriptor> props = new ArrayList<>();
props.add(HADOOP_CONF_FILES);
props.add(KERBEROS_USER_SERVICE);
props.add(ZOOKEEPER_QUORUM);
props.add(ZOOKEEPER_CLIENT_PORT);
props.add(ZOOKEEPER_ZNODE_PARENT);
props.add(HBASE_CLIENT_RETRIES);
props.add(PHOENIX_CLIENT_JAR_LOCATION);
props.addAll(getAdditionalProperties());
this.properties = Collections.unmodifiableList(props);
}
@Override
public void migrateProperties(final PropertyConfiguration config) {
config.removeProperty("Kerberos Principal");
config.removeProperty("Kerberos Password");
config.removeProperty("Kerberos Keytab");
config.removeProperty("kerberos-credentials-service");
}
protected List<PropertyDescriptor> getAdditionalProperties() {
return new ArrayList<>();
}
@Override
protected List<PropertyDescriptor> getSupportedPropertyDescriptors() {
return properties;
}
@Override
protected PropertyDescriptor getSupportedDynamicPropertyDescriptor(String propertyDescriptorName) {
return new PropertyDescriptor.Builder()
.description("Specifies the value for '" + propertyDescriptorName + "' in the HBase configuration.")
.name(propertyDescriptorName)
.addValidator(StandardValidators.NON_EMPTY_VALIDATOR)
.dynamic(true)
.build();
}
@Override
protected Collection<ValidationResult> customValidate(ValidationContext validationContext) {
boolean confFileProvided = validationContext.getProperty(HADOOP_CONF_FILES).isSet();
boolean zkQuorumProvided = validationContext.getProperty(ZOOKEEPER_QUORUM).isSet();
boolean zkPortProvided = validationContext.getProperty(ZOOKEEPER_CLIENT_PORT).isSet();
boolean znodeParentProvided = validationContext.getProperty(ZOOKEEPER_ZNODE_PARENT).isSet();
boolean retriesProvided = validationContext.getProperty(HBASE_CLIENT_RETRIES).isSet();
final List<ValidationResult> problems = new ArrayList<>();
if (!confFileProvided && (!zkQuorumProvided || !zkPortProvided || !znodeParentProvided || !retriesProvided)) {
problems.add(new ValidationResult.Builder()
.valid(false)
.subject(this.getClass().getSimpleName())
.explanation("ZooKeeper Quorum, ZooKeeper Client Port, ZooKeeper ZNode Parent, and HBase Client Retries are required " +
"when Hadoop Configuration Files are not provided.")
.build());
}
if (confFileProvided) {
final String configFiles = validationContext.getProperty(HADOOP_CONF_FILES).evaluateAttributeExpressions().getValue();
ValidationResources resources = validationResourceHolder.get();
// if no resources in the holder, or if the holder has different resources loaded,
// then load the Configuration and set the new resources in the holder
if (resources == null || !configFiles.equals(resources.getConfigResources())) {
getLogger().debug("Reloading validation resources");
resources = new ValidationResources(configFiles, getConfigurationFromFiles(configFiles));
validationResourceHolder.set(resources);
}
}
return problems;
}
/**
* As of Apache NiFi 1.5.0, due to changes made to
* {@link SecurityUtil#loginKerberos(Configuration, String, String)}, which is used by this
* class to authenticate a principal with Kerberos, HBase controller services no longer
* attempt relogins explicitly. For more information, please read the documentation for
* {@link SecurityUtil#loginKerberos(Configuration, String, String)}.
*
* @see SecurityUtil#loginKerberos(Configuration, String, String)
*/
@OnEnabled
public void onEnabled(final ConfigurationContext context) throws IOException, InterruptedException {
this.connection = createConnection(context);
// connection check
if (this.connection != null) {
final Admin admin = this.connection.getAdmin();
if (admin != null) {
admin.listTableNames();
final ClusterMetrics metrics = admin.getClusterMetrics();
if (metrics != null) {
final ServerName master = metrics.getMasterName();
masterAddress = master == null ? null : master.getAddress().toString();
}
}
}
}
protected Connection createConnection(final ConfigurationContext context) throws IOException, InterruptedException {
final String configFiles = context.getProperty(HADOOP_CONF_FILES).evaluateAttributeExpressions().getValue();
final Configuration hbaseConfig = getConfigurationFromFiles(configFiles);
// override with any properties that are provided
if (context.getProperty(ZOOKEEPER_QUORUM).isSet()) {
hbaseConfig.set(HBASE_CONF_ZK_QUORUM, context.getProperty(ZOOKEEPER_QUORUM).evaluateAttributeExpressions().getValue());
}
if (context.getProperty(ZOOKEEPER_CLIENT_PORT).isSet()) {
hbaseConfig.set(HBASE_CONF_ZK_PORT, context.getProperty(ZOOKEEPER_CLIENT_PORT).evaluateAttributeExpressions().getValue());
}
if (context.getProperty(ZOOKEEPER_ZNODE_PARENT).isSet()) {
hbaseConfig.set(HBASE_CONF_ZNODE_PARENT, context.getProperty(ZOOKEEPER_ZNODE_PARENT).evaluateAttributeExpressions().getValue());
}
if (context.getProperty(HBASE_CLIENT_RETRIES).isSet()) {
hbaseConfig.set(HBASE_CONF_CLIENT_RETRIES, context.getProperty(HBASE_CLIENT_RETRIES).evaluateAttributeExpressions().getValue());
}
// add any dynamic properties to the HBase configuration
for (final Map.Entry<PropertyDescriptor, String> entry : context.getProperties().entrySet()) {
final PropertyDescriptor descriptor = entry.getKey();
if (descriptor.isDynamic()) {
hbaseConfig.set(descriptor.getName(), entry.getValue());
}
}
if (SecurityUtil.isSecurityEnabled(hbaseConfig)) {
getLogger().debug("HBase Security Enabled, creating KerberosUser");
final KerberosUser kerberosUser = createKerberosUser(context);
ugi = SecurityUtil.getUgiForKerberosUser(hbaseConfig, kerberosUser);
kerberosUserReference.set(kerberosUser);
getLogger().info("Successfully logged in as principal {}", kerberosUser.getPrincipal());
return getUgi().doAs((PrivilegedExceptionAction<Connection>) () -> ConnectionFactory.createConnection(hbaseConfig));
} else {
getLogger().debug("Simple Authentication");
return ConnectionFactory.createConnection(hbaseConfig);
}
}
protected KerberosUser createKerberosUser(final ConfigurationContext context) {
// Check Kerberos User Service first, if present then get the KerberosUser from the service
// The customValidate method ensures that KerberosUserService can't be set at the same time as the credentials service or explicit properties
final KerberosUserService kerberosUserService = context.getProperty(KERBEROS_USER_SERVICE).asControllerService(KerberosUserService.class);
if (kerberosUserService != null) {
return kerberosUserService.createKerberosUser();
} else {
throw new IllegalStateException("Unable to authenticate with Kerberos, no keytab or password was provided");
}
}
protected Configuration getConfigurationFromFiles(final String configFiles) {
final Configuration hbaseConfig = HBaseConfiguration.create();
if (StringUtils.isNotBlank(configFiles)) {
for (final String configFile : configFiles.split(",")) {
hbaseConfig.addResource(new Path(configFile.trim()));
}
}
return hbaseConfig;
}
@OnDisabled
public void shutdown() {
if (connection != null) {
try {
connection.close();
} catch (final Exception e) {
getLogger().warn("HBase connection close failed", e);
}
}
final KerberosUser kerberosUser = kerberosUserReference.get();
if (kerberosUser != null) {
try {
kerberosUser.logout();
} catch (final Exception e) {
getLogger().warn("KeberosUser Logout Failed", e);
} finally {
ugi = null;
kerberosUserReference.set(null);
}
}
}
protected List<Put> buildPuts(byte[] rowKey, List<PutColumn> columns) {
List<Put> retVal = new ArrayList<>();
try {
Put put = null;
for (final PutColumn column : columns) {
if (put == null || (put.getCellVisibility() == null && column.getVisibility() != null) || ( put.getCellVisibility() != null
&& !put.getCellVisibility().getExpression().equals(column.getVisibility())
)) {
put = new Put(rowKey);
if (column.getVisibility() != null) {
put.setCellVisibility(new CellVisibility(column.getVisibility()));
}
retVal.add(put);
}
if (column.getTimestamp() != null) {
put.addColumn(
column.getColumnFamily(),
column.getColumnQualifier(),
column.getTimestamp(),
column.getBuffer());
} else {
put.addColumn(
column.getColumnFamily(),
column.getColumnQualifier(),
column.getBuffer());
}
}
} catch (DeserializationException de) {
getLogger().error("Error writing cell visibility statement.", de);
throw new RuntimeException(de);
}
return retVal;
}
@Override
public void put(final String tableName, final Collection<PutFlowFile> puts) throws IOException {
SecurityUtil.callWithUgi(getUgi(), () -> {
try (final Table table = connection.getTable(TableName.valueOf(tableName))) {
// Create one Put per row....
final Map<String, List<PutColumn>> sorted = new HashMap<>();
final List<Put> newPuts = new ArrayList<>();
for (final PutFlowFile putFlowFile : puts) {
final String rowKeyString = new String(putFlowFile.getRow(), StandardCharsets.UTF_8);
final List<PutColumn> columns = sorted.computeIfAbsent(rowKeyString, k -> new ArrayList<>());
columns.addAll(putFlowFile.getColumns());
}
for (final Map.Entry<String, List<PutColumn>> entry : sorted.entrySet()) {
newPuts.addAll(buildPuts(entry.getKey().getBytes(StandardCharsets.UTF_8), entry.getValue()));
}
table.put(newPuts);
}
return null;
});
}
@Override
public void put(final String tableName, final byte[] rowId, final Collection<PutColumn> columns) throws IOException {
SecurityUtil.callWithUgi(getUgi(), () -> {
try (final Table table = connection.getTable(TableName.valueOf(tableName))) {
table.put(buildPuts(rowId, new ArrayList<>(columns)));
}
return null;
});
}
@Override
public boolean checkAndPut(final String tableName, final byte[] rowId, final byte[] family, final byte[] qualifier, final byte[] value, final PutColumn column) throws IOException {
return SecurityUtil.callWithUgi(getUgi(), () -> {
try (final Table table = connection.getTable(TableName.valueOf(tableName))) {
final Put put = new Put(rowId);
put.addColumn(
column.getColumnFamily(),
column.getColumnQualifier(),
column.getBuffer());
final CheckAndMutate checkAndMutate = CheckAndMutate.newBuilder(rowId)
.ifEquals(family, qualifier, value)
.build(put);
final CheckAndMutateResult result = table.checkAndMutate(checkAndMutate);
return result.isSuccess();
}
});
}
@Override
public void delete(final String tableName, final byte[] rowId) throws IOException {
delete(tableName, rowId, null);
}
@Override
public void delete(String tableName, byte[] rowId, String visibilityLabel) throws IOException {
SecurityUtil.callWithUgi(getUgi(), () -> {
try (final Table table = connection.getTable(TableName.valueOf(tableName))) {
Delete delete = new Delete(rowId);
if (!StringUtils.isEmpty(visibilityLabel)) {
delete.setCellVisibility(new CellVisibility(visibilityLabel));
}
table.delete(delete);
}
return null;
});
}
@Override
public void delete(final String tableName, final List<byte[]> rowIds) throws IOException {
delete(tableName, rowIds, null);
}
@Override
public void deleteCells(final String tableName, final List<DeleteRequest> deletes) throws IOException {
final List<Delete> deleteRequests = new ArrayList<>();
for (final DeleteRequest req : deletes) {
final Delete delete = new Delete(req.getRowId())
.addColumn(req.getColumnFamily(), req.getColumnQualifier());
if (!StringUtils.isEmpty(req.getVisibilityLabel())) {
delete.setCellVisibility(new CellVisibility(req.getVisibilityLabel()));
}
deleteRequests.add(delete);
}
batchDelete(tableName, deleteRequests);
}
@Override
public void delete(String tableName, List<byte[]> rowIds, String visibilityLabel) throws IOException {
final List<Delete> deletes = new ArrayList<>();
for (final byte[] rowId : rowIds) {
final Delete delete = new Delete(rowId);
if (!StringUtils.isBlank(visibilityLabel)) {
delete.setCellVisibility(new CellVisibility(visibilityLabel));
}
deletes.add(delete);
}
batchDelete(tableName, deletes);
}
private void batchDelete(String tableName, List<Delete> deletes) throws IOException {
SecurityUtil.callWithUgi(getUgi(), () -> {
try (final Table table = connection.getTable(TableName.valueOf(tableName))) {
table.delete(deletes);
}
return null;
});
}
@Override
public void scan(final String tableName, final Collection<Column> columns, final String filterExpression, final long minTime, final ResultHandler handler)
throws IOException {
scan(tableName, columns, filterExpression, minTime, null, handler);
}
@Override
public void scan(String tableName, Collection<Column> columns, String filterExpression, long minTime, List<String> visibilityLabels, ResultHandler handler) throws IOException {
SecurityUtil.callWithUgi(getUgi(), () -> {
Filter filter = null;
if (!StringUtils.isBlank(filterExpression)) {
ParseFilter parseFilter = new ParseFilter();
filter = parseFilter.parseFilterString(filterExpression);
}
try (final Table table = connection.getTable(TableName.valueOf(tableName));
final ResultScanner scanner = getResults(table, columns, filter, minTime, visibilityLabels)) {
for (final Result result : scanner) {
final byte[] rowKey = result.getRow();
final Cell[] cells = result.rawCells();
if (cells == null) {
continue;
}
// convert HBase cells to NiFi cells
final ResultCell[] resultCells = new ResultCell[cells.length];
for (int i = 0; i < cells.length; i++) {
final Cell cell = cells[i];
final ResultCell resultCell = getResultCell(cell);
resultCells[i] = resultCell;
}
// delegate to the handler
handler.handle(rowKey, resultCells);
}
}
return null;
});
}
@Override
public void scan(final String tableName, final byte[] startRow, final byte[] endRow, final Collection<Column> columns, List<String> authorizations, final ResultHandler handler)
throws IOException {
SecurityUtil.callWithUgi(getUgi(), () -> {
try (final Table table = connection.getTable(TableName.valueOf(tableName));
final ResultScanner scanner = getResults(table, startRow, endRow, columns, authorizations)) {
for (final Result result : scanner) {
final byte[] rowKey = result.getRow();
final Cell[] cells = result.rawCells();
if (cells == null) {
continue;
}
// convert HBase cells to NiFi cells
final ResultCell[] resultCells = new ResultCell[cells.length];
for (int i = 0; i < cells.length; i++) {
final Cell cell = cells[i];
final ResultCell resultCell = getResultCell(cell);
resultCells[i] = resultCell;
}
// delegate to the handler
handler.handle(rowKey, resultCells);
}
}
return null;
});
}
@Override
public void scan(final String tableName, final String startRow, final String endRow, String filterExpression,
final Long timerangeMin, final Long timerangeMax, final Integer limitRows, final Boolean isReversed,
final Boolean blockCache, final Collection<Column> columns, List<String> visibilityLabels, final ResultHandler handler) throws IOException {
SecurityUtil.callWithUgi(getUgi(), () -> {
try (final Table table = connection.getTable(TableName.valueOf(tableName));
final ResultScanner scanner = getResults(table, startRow, endRow, filterExpression, timerangeMin,
timerangeMax, isReversed, blockCache, columns, visibilityLabels)) {
int cnt = 0;
final int lim = limitRows != null ? limitRows : 0;
for (final Result result : scanner) {
if (lim > 0 && ++cnt > lim) {
break;
}
final byte[] rowKey = result.getRow();
final Cell[] cells = result.rawCells();
if (cells == null) {
continue;
}
// convert HBase cells to NiFi cells
final ResultCell[] resultCells = new ResultCell[cells.length];
for (int i = 0; i < cells.length; i++) {
final Cell cell = cells[i];
final ResultCell resultCell = getResultCell(cell);
resultCells[i] = resultCell;
}
// delegate to the handler
handler.handle(rowKey, resultCells);
}
}
return null;
});
}
//
protected ResultScanner getResults(final Table table, final String startRow, final String endRow, final String filterExpression, final Long timerangeMin, final Long timerangeMax,
final Boolean isReversed, final Boolean blockCache, final Collection<Column> columns, List<String> authorizations) throws IOException {
Scan scan = new Scan();
if (!StringUtils.isBlank(startRow)) {
scan = scan.withStartRow(startRow.getBytes(StandardCharsets.UTF_8));
}
if (!StringUtils.isBlank(endRow)) {
byte[] endRowBytes = endRow.getBytes(StandardCharsets.UTF_8);
if (endRow.equals(startRow)) {
scan = scan.withStopRow(endRowBytes, true);
} else {
scan = scan.withStopRow(endRowBytes, false);
}
}
if (authorizations != null && authorizations.size() > 0) {
scan.setAuthorizations(new Authorizations(authorizations));
}
Filter filter = null;
if (columns != null) {
for (Column col : columns) {
if (col.getQualifier() == null) {
scan.addFamily(col.getFamily());
} else {
scan.addColumn(col.getFamily(), col.getQualifier());
}
}
}
if (!StringUtils.isBlank(filterExpression)) {
ParseFilter parseFilter = new ParseFilter();
filter = parseFilter.parseFilterString(filterExpression);
}
if (filter != null) {
scan.setFilter(filter);
}
if (timerangeMin != null && timerangeMax != null) {
scan.setTimeRange(timerangeMin, timerangeMax);
}
// ->>> reserved for HBase v 2 or later
//if (limitRows != null && limitRows > 0){
// scan.setLimit(limitRows)
//}
if (isReversed != null) {
scan.setReversed(isReversed);
}
scan.setCacheBlocks(blockCache);
return table.getScanner(scan);
}
// protected and extracted into separate method for testing
protected ResultScanner getResults(final Table table, final byte[] startRow, final byte[] endRow, final Collection<Column> columns, List<String> authorizations) throws IOException {
Scan scan = new Scan();
scan = scan.withStartRow(startRow);
if (Arrays.equals(startRow, endRow)) {
scan = scan.withStopRow(endRow, true);
} else {
scan = scan.withStopRow(endRow, false);
}
if (authorizations != null && authorizations.size() > 0) {
scan.setAuthorizations(new Authorizations(authorizations));
}
if (columns != null && columns.size() > 0) {
for (Column col : columns) {
if (col.getQualifier() == null) {
scan.addFamily(col.getFamily());
} else {
scan.addColumn(col.getFamily(), col.getQualifier());
}
}
}
return table.getScanner(scan);
}
// protected and extracted into separate method for testing
protected ResultScanner getResults(final Table table, final Collection<Column> columns, final Filter filter, final long minTime, List<String> authorizations) throws IOException {
// Create a new scan. We will set the min timerange as the latest timestamp that
// we have seen so far. The minimum timestamp is inclusive, so we will get duplicates.
// We will record any cells that have the latest timestamp, so that when we scan again,
// we know to throw away those duplicates.
final Scan scan = new Scan();
scan.setTimeRange(minTime, Long.MAX_VALUE);
if (authorizations != null && authorizations.size() > 0) {
scan.setAuthorizations(new Authorizations(authorizations));
}
if (filter != null) {
scan.setFilter(filter);
}
if (columns != null) {
for (Column col : columns) {
if (col.getQualifier() == null) {
scan.addFamily(col.getFamily());
} else {
scan.addColumn(col.getFamily(), col.getQualifier());
}
}
}
return table.getScanner(scan);
}
private ResultCell getResultCell(Cell cell) {
final ResultCell resultCell = new ResultCell();
resultCell.setRowArray(cell.getRowArray());
resultCell.setRowOffset(cell.getRowOffset());
resultCell.setRowLength(cell.getRowLength());
resultCell.setFamilyArray(cell.getFamilyArray());
resultCell.setFamilyOffset(cell.getFamilyOffset());
resultCell.setFamilyLength(cell.getFamilyLength());
resultCell.setQualifierArray(cell.getQualifierArray());
resultCell.setQualifierOffset(cell.getQualifierOffset());
resultCell.setQualifierLength(cell.getQualifierLength());
resultCell.setTimestamp(cell.getTimestamp());
final Cell.Type cellType = cell.getType();
if (cellType != null) {
resultCell.setTypeByte(cellType.getCode());
}
resultCell.setValueArray(cell.getValueArray());
resultCell.setValueOffset(cell.getValueOffset());
resultCell.setValueLength(cell.getValueLength());
return resultCell;
}
@Override
public List<HBaseRegion> listHBaseRegions(final String tableName) throws HBaseClientException {
if (connection == null || connection.isClosed() || connection.isAborted()) {
final String errorMsg = String.format(
"Unable to fetch regions for table %s since there is no active connection to HBase.",
tableName
);
throw new IllegalStateException(errorMsg);
}
try {
final List<RegionInfo> regionInfos = connection.getAdmin().getRegions(TableName.valueOf(tableName));
// maps to the NiFi HBaseRegion object
final List<HBaseRegion> regions = regionInfos.stream()
.map(regionInfo ->
new HBaseRegion(
regionInfo.getStartKey(),
regionInfo.getEndKey(),
regionInfo.getRegionNameAsString(),
regionInfo.getRegionId(),
regionInfo.isDegenerate()
)
)
.collect(Collectors.toList());
return regions;
} catch (final IOException e) {
logger.error("Encountered error while communicating with HBase.", e);
throw new HBaseClientException(e);
}
}
static protected class ValidationResources {
private final String configResources;
private final Configuration configuration;
public ValidationResources(final String configResources, final Configuration configuration) {
this.configResources = configResources;
this.configuration = configuration;
}
public String getConfigResources() {
return configResources;
}
public Configuration getConfiguration() {
return configuration;
}
}
@Override
public byte[] toBytes(boolean b) {
return Bytes.toBytes(b);
}
@Override
public byte[] toBytes(float f) {
return Bytes.toBytes(f);
}
@Override
public byte[] toBytes(int i) {
return Bytes.toBytes(i);
}
@Override
public byte[] toBytes(long l) {
return Bytes.toBytes(l);
}
@Override
public byte[] toBytes(double d) {
return Bytes.toBytes(d);
}
@Override
public byte[] toBytes(String s) {
return Bytes.toBytes(s);
}
@Override
public byte[] toBytesBinary(String s) {
return Bytes.toBytesBinary(s);
}
@Override
public String toTransitUri(String tableName, String rowKey) {
if (connection == null) {
logger.warn("Connection has not been established, could not create a transit URI. Returning null.");
return null;
}
final String transitUriMasterAddress = StringUtils.isEmpty(masterAddress) ? "unknown" : masterAddress;
return "hbase://" + transitUriMasterAddress + "/" + tableName + (StringUtils.isEmpty(rowKey) ? "" : "/" + rowKey);
}
UserGroupInformation getUgi() throws IOException {
getLogger().trace("getting UGI instance");
// if there is a KerberosUser associated with UGI, call checkTGTAndRelogin to ensure UGI's underlying Subject has a valid ticket
SecurityUtil.checkTGTAndRelogin(getLogger(), kerberosUserReference.get());
return ugi;
}
}

View File

@ -1,115 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.nifi.hbase;
import org.apache.nifi.annotation.lifecycle.OnEnabled;
import org.apache.nifi.components.AllowableValue;
import org.apache.nifi.components.PropertyDescriptor;
import org.apache.nifi.components.Validator;
import org.apache.nifi.controller.ConfigurationContext;
import org.apache.nifi.lookup.LookupFailureException;
import org.apache.nifi.lookup.LookupService;
import org.apache.nifi.reporting.InitializationException;
import org.apache.nifi.util.StringUtils;
import java.io.IOException;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import java.util.Set;
import java.util.stream.Collectors;
public class HBase_2_ListLookupService extends AbstractHBaseLookupService implements LookupService<List> {
public static final AllowableValue KEY_LIST = new AllowableValue("key_list", "List of keys",
"Return the row as a list of the column qualifiers (keys)");
public static final AllowableValue VALUE_LIST = new AllowableValue("value_list", "List of values",
"Return the row as a list of the values associated with each column qualifier.");
public static final PropertyDescriptor RETURN_TYPE = new PropertyDescriptor.Builder()
.name("hb-lu-list-return-type")
.displayName("Return Type")
.description("Choose whether to return a list of the keys or a list of the values for the supplied row key.")
.allowableValues(KEY_LIST, VALUE_LIST)
.defaultValue(KEY_LIST.getValue())
.required(true)
.addValidator(Validator.VALID)
.build();
public static final List<PropertyDescriptor> _PROPERTIES;
static {
List<PropertyDescriptor> _temp = new ArrayList<>();
_temp.addAll(PROPERTIES);
_temp.add(RETURN_TYPE);
_PROPERTIES = Collections.unmodifiableList(_temp);
}
@Override
public List<PropertyDescriptor> getSupportedPropertyDescriptors() {
return _PROPERTIES;
}
@Override
public Optional<List> lookup(Map<String, Object> coordinates) throws LookupFailureException {
if (coordinates.get(ROW_KEY_KEY) == null) {
return Optional.empty();
}
final String rowKey = coordinates.get(ROW_KEY_KEY).toString();
if (StringUtils.isBlank(rowKey)) {
return Optional.empty();
}
final byte[] rowKeyBytes = rowKey.getBytes(StandardCharsets.UTF_8);
try {
final Map<String, Object> values = scan(rowKeyBytes);
if (values.size() > 0) {
List<String> retVal = returnType.equals(KEY_LIST.getValue())
? new ArrayList<>(values.keySet())
: values.values().stream().map( obj -> obj.toString() ).collect(Collectors.toList());
return Optional.ofNullable(retVal);
} else {
return Optional.empty();
}
} catch (IOException e) {
getLogger().error("Error occurred loading {}", coordinates.get("rowKey"), e);
throw new LookupFailureException(e);
}
}
private String returnType;
@OnEnabled
public void onEnabled(ConfigurationContext context) throws InterruptedException, IOException, InitializationException {
super.onEnabled(context);
returnType = context.getProperty(RETURN_TYPE).getValue();
}
@Override
public Class<?> getValueType() {
return List.class;
}
@Override
public Set<String> getRequiredKeys() {
return REQUIRED_KEYS;
}
}

View File

@ -1,85 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.nifi.hbase;
import org.apache.nifi.annotation.documentation.CapabilityDescription;
import org.apache.nifi.annotation.documentation.Tags;
import org.apache.nifi.lookup.LookupFailureException;
import org.apache.nifi.lookup.LookupService;
import org.apache.nifi.serialization.SimpleRecordSchema;
import org.apache.nifi.serialization.record.MapRecord;
import org.apache.nifi.serialization.record.Record;
import org.apache.nifi.serialization.record.RecordField;
import org.apache.nifi.serialization.record.RecordFieldType;
import org.apache.nifi.serialization.record.RecordSchema;
import org.apache.nifi.util.StringUtils;
import java.io.IOException;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import java.util.Set;
@Tags({"hbase", "record", "lookup", "service"})
@CapabilityDescription("A lookup service that retrieves one or more columns from HBase and returns them as a record. The lookup coordinates " +
"must contain 'rowKey' which will be the HBase row id.")
public class HBase_2_RecordLookupService extends AbstractHBaseLookupService implements LookupService<Record> {
@Override
public Optional<Record> lookup(Map<String, Object> coordinates) throws LookupFailureException {
if (coordinates.get(ROW_KEY_KEY) == null) {
return Optional.empty();
}
final String rowKey = coordinates.get(ROW_KEY_KEY).toString();
if (StringUtils.isBlank(rowKey)) {
return Optional.empty();
}
final byte[] rowKeyBytes = rowKey.getBytes(StandardCharsets.UTF_8);
try {
final Map<String, Object> values = scan(rowKeyBytes);
if (values.size() > 0) {
final List<RecordField> fields = new ArrayList<>();
for (String key : values.keySet()) {
fields.add(new RecordField(key, RecordFieldType.STRING.getDataType()));
}
final RecordSchema schema = new SimpleRecordSchema(fields);
return Optional.ofNullable(new MapRecord(schema, values));
} else {
return Optional.empty();
}
} catch (IOException e) {
getLogger().error("Error occurred loading {}", coordinates.get("rowKey"), e);
throw new LookupFailureException(e);
}
}
@Override
public Class<?> getValueType() {
return Record.class;
}
@Override
public Set<String> getRequiredKeys() {
return REQUIRED_KEYS;
}
}

View File

@ -1,50 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.nifi.hbase;
import org.apache.nifi.components.PropertyDescriptor;
import org.apache.nifi.controller.ConfigurationContext;
import org.apache.nifi.processor.util.StandardValidators;
import org.apache.nifi.util.StringUtils;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
class VisibilityLabelUtils {
static final PropertyDescriptor AUTHORIZATIONS = new PropertyDescriptor.Builder()
.name("hb-lu-authorizations")
.displayName("Authorizations")
.description("The list of authorization tokens to be used with cell visibility if it is enabled. These will be used to " +
"override the default authorization list for the user accessing HBase.")
.required(false)
.addValidator(StandardValidators.NON_EMPTY_VALIDATOR)
.build();
static List<String> getAuthorizations(ConfigurationContext context) {
List<String> tokens = new ArrayList<>();
String authorizationString = context.getProperty(AUTHORIZATIONS).isSet()
? context.getProperty(AUTHORIZATIONS).getValue()
: "";
if (!StringUtils.isEmpty(authorizationString)) {
tokens = Arrays.asList(authorizationString.split(",[\\s]*"));
}
return tokens;
}
}

View File

@ -1,17 +0,0 @@
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
org.apache.nifi.hbase.HBase_2_ClientService
org.apache.nifi.hbase.HBase_2_ClientMapCacheService
org.apache.nifi.hbase.HBase_2_RecordLookupService

View File

@ -1,211 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.nifi.hbase;
import org.apache.hadoop.hbase.Cell;
import org.apache.hadoop.hbase.client.Connection;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.client.ResultScanner;
import org.apache.hadoop.hbase.client.Table;
import org.apache.hadoop.hbase.filter.Filter;
import org.apache.hadoop.security.UserGroupInformation;
import org.apache.nifi.controller.ConfigurationContext;
import org.apache.nifi.hbase.put.PutColumn;
import org.apache.nifi.hbase.put.PutFlowFile;
import org.apache.nifi.hbase.scan.Column;
import org.mockito.Mockito;
import java.io.IOException;
import java.nio.charset.StandardCharsets;
import java.security.PrivilegedExceptionAction;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import static org.mockito.ArgumentMatchers.any;
import static org.mockito.Mockito.doAnswer;
import static org.mockito.Mockito.mock;
import static org.mockito.Mockito.when;
/**
* Override methods to create a mock service that can return staged data
*/
public class MockHBaseClientService extends HBase_2_ClientService {
private Table table;
private String family;
private Map<String, Result> results = new HashMap<>();
private UserGroupInformation mockUgi;
{
mockUgi = mock(UserGroupInformation.class);
try {
doAnswer(invocation -> {
PrivilegedExceptionAction<?> action = invocation.getArgument(0);
return action.run();
}).when(mockUgi).doAs(any(PrivilegedExceptionAction.class));
} catch (Exception e) {
throw new RuntimeException(e);
}
}
public MockHBaseClientService(final Table table, final String family) {
this.table = table;
this.family = family;
}
public void addResult(final String rowKey, final Map<String, String> cells, final long timestamp) {
final byte[] rowArray = rowKey.getBytes(StandardCharsets.UTF_8);
final Cell[] cellArray = new Cell[cells.size()];
int i = 0;
for (final Map.Entry<String, String> cellEntry : cells.entrySet()) {
final Cell cell = mock(Cell.class);
when(cell.getRowArray()).thenReturn(rowArray);
when(cell.getRowOffset()).thenReturn(0);
when(cell.getRowLength()).thenReturn((short) rowArray.length);
final String cellValue = cellEntry.getValue();
final byte[] valueArray = cellValue.getBytes(StandardCharsets.UTF_8);
when(cell.getValueArray()).thenReturn(valueArray);
when(cell.getValueOffset()).thenReturn(0);
when(cell.getValueLength()).thenReturn(valueArray.length);
final byte[] familyArray = family.getBytes(StandardCharsets.UTF_8);
when(cell.getFamilyArray()).thenReturn(familyArray);
when(cell.getFamilyOffset()).thenReturn(0);
when(cell.getFamilyLength()).thenReturn((byte) familyArray.length);
final String qualifier = cellEntry.getKey();
final byte[] qualifierArray = qualifier.getBytes(StandardCharsets.UTF_8);
when(cell.getQualifierArray()).thenReturn(qualifierArray);
when(cell.getQualifierOffset()).thenReturn(0);
when(cell.getQualifierLength()).thenReturn(qualifierArray.length);
when(cell.getTimestamp()).thenReturn(timestamp);
cellArray[i++] = cell;
}
final Result result = mock(Result.class);
when(result.getRow()).thenReturn(rowArray);
when(result.rawCells()).thenReturn(cellArray);
results.put(rowKey, result);
}
@Override
public void put(final String tableName, final byte[] rowId, final Collection<PutColumn> columns) throws IOException {
Put put = new Put(rowId);
Map<String, String> map = new HashMap<String, String>();
for (final PutColumn column : columns) {
put.addColumn(
column.getColumnFamily(),
column.getColumnQualifier(),
column.getBuffer());
map.put(new String(column.getColumnQualifier()), new String(column.getBuffer()));
}
table.put(put);
addResult(new String(rowId), map, 1);
}
@Override
public void put(final String tableName, final Collection<PutFlowFile> puts) throws IOException {
final Map<String, List<PutColumn>> sorted = new HashMap<>();
final List<Put> newPuts = new ArrayList<>();
for (final PutFlowFile putFlowFile : puts) {
Map<String, String> map = new HashMap<String, String>();
final String rowKeyString = new String(putFlowFile.getRow(), StandardCharsets.UTF_8);
List<PutColumn> columns = sorted.get(rowKeyString);
if (columns == null) {
columns = new ArrayList<>();
sorted.put(rowKeyString, columns);
}
columns.addAll(putFlowFile.getColumns());
for (PutColumn column : putFlowFile.getColumns()) {
map.put(new String(column.getColumnQualifier()), new String(column.getBuffer()));
}
addResult(new String(putFlowFile.getRow()), map, 1);
}
for (final Map.Entry<String, List<PutColumn>> entry : sorted.entrySet()) {
newPuts.addAll(buildPuts(entry.getKey().getBytes(StandardCharsets.UTF_8), entry.getValue()));
}
table.put(newPuts);
}
@Override
public boolean checkAndPut(final String tableName, final byte[] rowId, final byte[] family, final byte[] qualifier, final byte[] value, final PutColumn column) throws IOException {
for (Result result : results.values()) {
if (Arrays.equals(result.getRow(), rowId)) {
Cell[] cellArray = result.rawCells();
for (Cell cell : cellArray) {
if (Arrays.equals(cell.getFamilyArray(), family) && Arrays.equals(cell.getQualifierArray(), qualifier)) {
if (value == null || !Arrays.equals(cell.getValueArray(), value)) {
return false;
}
}
}
}
}
final List<PutColumn> putColumns = new ArrayList<PutColumn>();
putColumns.add(column);
put(tableName, rowId, putColumns);
return true;
}
@Override
protected ResultScanner getResults(Table table, byte[] startRow, byte[] endRow, Collection<Column> columns, List<String> labels) throws IOException {
final ResultScanner scanner = mock(ResultScanner.class);
Mockito.when(scanner.iterator()).thenReturn(results.values().iterator());
return scanner;
}
@Override
protected ResultScanner getResults(Table table, Collection<Column> columns, Filter filter, long minTime, List<String> labels) throws IOException {
final ResultScanner scanner = mock(ResultScanner.class);
Mockito.when(scanner.iterator()).thenReturn(results.values().iterator());
return scanner;
}
protected ResultScanner getResults(final Table table, final String startRow, final String endRow, final String filterExpression, final Long timerangeMin, final Long timerangeMax,
final Integer limitRows, final Boolean isReversed, final Collection<Column> columns) throws IOException {
final ResultScanner scanner = mock(ResultScanner.class);
Mockito.when(scanner.iterator()).thenReturn(results.values().iterator());
return scanner;
}
@Override
protected Connection createConnection(ConfigurationContext context) throws IOException {
Connection connection = mock(Connection.class);
Mockito.when(connection.getTable(table.getName())).thenReturn(table);
return connection;
}
@Override
UserGroupInformation getUgi() throws IOException {
return mockUgi;
}
}

View File

@ -1,444 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.nifi.hbase;
import org.apache.hadoop.hbase.Cell;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.client.Table;
import org.apache.nifi.distributed.cache.client.AtomicCacheEntry;
import org.apache.nifi.distributed.cache.client.AtomicDistributedMapCacheClient;
import org.apache.nifi.distributed.cache.client.Deserializer;
import org.apache.nifi.distributed.cache.client.DistributedMapCacheClient;
import org.apache.nifi.distributed.cache.client.Serializer;
import org.apache.nifi.distributed.cache.client.exception.DeserializationException;
import org.apache.nifi.distributed.cache.client.exception.SerializationException;
import org.apache.nifi.hbase.scan.ResultCell;
import org.apache.nifi.reporting.InitializationException;
import org.apache.nifi.util.TestRunner;
import org.apache.nifi.util.TestRunners;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
import org.mockito.ArgumentCaptor;
import org.mockito.Mockito;
import java.io.IOException;
import java.io.OutputStream;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.NavigableMap;
import static org.junit.jupiter.api.Assertions.assertArrayEquals;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertFalse;
import static org.junit.jupiter.api.Assertions.assertNull;
import static org.junit.jupiter.api.Assertions.assertTrue;
import static org.mockito.Mockito.times;
import static org.mockito.Mockito.verify;
import static org.mockito.Mockito.when;
public class TestHBase_2_ClientMapCacheService {
private Serializer<String> stringSerializer = new StringSerializer();
private Deserializer<String> stringDeserializer = new StringDeserializer();
@BeforeEach
public void setup() {
// needed for calls to UserGroupInformation.setConfiguration() to work when passing in
// config with Kerberos authentication enabled
System.setProperty("java.security.krb5.realm", "nifi.com");
System.setProperty("java.security.krb5.kdc", "nifi.kdc");
}
private final String tableName = "nifi";
private final String columnFamily = "family1";
private final String columnQualifier = "qualifier1";
@Test
public void testPut() throws InitializationException, IOException {
final String row = "row1";
final String content = "content1";
final TestRunner runner = TestRunners.newTestRunner(TestProcessor.class);
// Mock an HBase Table so we can verify the put operations later
final Table table = Mockito.mock(Table.class);
when(table.getName()).thenReturn(TableName.valueOf(tableName));
// create the controller service and link it to the test processor
final MockHBaseClientService service = configureHBaseClientService(runner, table);
runner.assertValid(service);
final HBaseClientService hBaseClientService = runner.getProcessContext().getProperty(TestProcessor.HBASE_CLIENT_SERVICE)
.asControllerService(HBaseClientService.class);
final DistributedMapCacheClient cacheService = configureHBaseCacheService(runner, hBaseClientService);
runner.assertValid(cacheService);
// try to put a single cell
final DistributedMapCacheClient hBaseCacheService = runner.getProcessContext().getProperty(TestProcessor.HBASE_CACHE_SERVICE)
.asControllerService(DistributedMapCacheClient.class);
hBaseCacheService.put( row, content, stringSerializer, stringSerializer);
// verify only one call to put was made
ArgumentCaptor<Put> capture = ArgumentCaptor.forClass(Put.class);
verify(table, times(1)).put(capture.capture());
verifyPut(row, columnFamily, columnQualifier, content, capture.getValue());
}
@Test
public void testPutAll() throws InitializationException, IOException {
final TestRunner runner = TestRunners.newTestRunner(TestProcessor.class);
// Mock an HBase Table so we can verify the put operations later
final Table table = Mockito.mock(Table.class);
when(table.getName()).thenReturn(TableName.valueOf(tableName));
// create the controller service and link it to the test processor
final MockHBaseClientService service = configureHBaseClientService(runner, table);
runner.assertValid(service);
final HBaseClientService hBaseClientService = runner.getProcessContext().getProperty(TestProcessor.HBASE_CLIENT_SERVICE)
.asControllerService(HBaseClientService.class);
final DistributedMapCacheClient cacheService = configureHBaseCacheService(runner, hBaseClientService);
runner.assertValid(cacheService);
// try to put a single cell
final DistributedMapCacheClient hBaseCacheService = runner.getProcessContext().getProperty(TestProcessor.HBASE_CACHE_SERVICE)
.asControllerService(DistributedMapCacheClient.class);
Map<String, String> putz = new HashMap<>();
List<String> content = new ArrayList<>();
List<String> rows = new ArrayList<>();
for (int x = 1; x <= 5; x++) {
putz.put(String.format("row-%d", x), String.format("content-%d", x));
content.add(String.format("content-%d", x));
rows.add(String.format("row-%d", x));
}
hBaseCacheService.putAll( putz, stringSerializer, stringSerializer);
// verify only one call to put was made
ArgumentCaptor<List> capture = ArgumentCaptor.forClass(List.class);
verify(table, times(1)).put(capture.capture());
List<Put> captured = capture.getValue();
for (int x = 0; x < 5; x++) {
Put put = captured.get(x);
String row = new String(put.getRow());
assertTrue(rows.contains(row));
NavigableMap<byte[], List<Cell>> familyCells = put.getFamilyCellMap();
assertEquals(1, familyCells.size());
Map.Entry<byte[], List<Cell>> entry = familyCells.firstEntry();
assertEquals(columnFamily, new String(entry.getKey()));
assertEquals(1, entry.getValue().size());
Cell cell = entry.getValue().get(0);
String contentString = new String(cell.getValueArray(), cell.getValueOffset(), cell.getValueLength());
assertEquals(columnQualifier, new String(cell.getQualifierArray(), cell.getQualifierOffset(), cell.getQualifierLength()));
assertTrue(content.contains(contentString));
content.remove(contentString);
rows.remove(row);
}
}
@Test
public void testGet() throws InitializationException, IOException {
final String row = "row1";
final String content = "content1";
final TestRunner runner = TestRunners.newTestRunner(TestProcessor.class);
// Mock an HBase Table so we can verify the put operations later
final Table table = Mockito.mock(Table.class);
when(table.getName()).thenReturn(TableName.valueOf(tableName));
// create the controller service and link it to the test processor
final MockHBaseClientService service = configureHBaseClientService(runner, table);
runner.assertValid(service);
final HBaseClientService hBaseClientService = runner.getProcessContext().getProperty(TestProcessor.HBASE_CLIENT_SERVICE)
.asControllerService(HBaseClientService.class);
final DistributedMapCacheClient cacheService = configureHBaseCacheService(runner, hBaseClientService);
runner.assertValid(cacheService);
final DistributedMapCacheClient hBaseCacheService = runner.getProcessContext().getProperty(TestProcessor.HBASE_CACHE_SERVICE)
.asControllerService(DistributedMapCacheClient.class);
hBaseCacheService.put(row, content, stringSerializer, stringSerializer);
final String result = hBaseCacheService.get(row, stringSerializer, stringDeserializer);
assertEquals( content, result);
}
@Test
public void testContainsKey() throws InitializationException, IOException {
final String row = "row1";
final String content = "content1";
final TestRunner runner = TestRunners.newTestRunner(TestProcessor.class);
// Mock an HBase Table so we can verify the put operations later
final Table table = Mockito.mock(Table.class);
when(table.getName()).thenReturn(TableName.valueOf(tableName));
// create the controller service and link it to the test processor
final MockHBaseClientService service = configureHBaseClientService(runner, table);
runner.assertValid(service);
final HBaseClientService hBaseClientService = runner.getProcessContext().getProperty(TestProcessor.HBASE_CLIENT_SERVICE)
.asControllerService(HBaseClientService.class);
final DistributedMapCacheClient cacheService = configureHBaseCacheService(runner, hBaseClientService);
runner.assertValid(cacheService);
final DistributedMapCacheClient hBaseCacheService = runner.getProcessContext().getProperty(TestProcessor.HBASE_CACHE_SERVICE)
.asControllerService(DistributedMapCacheClient.class);
assertFalse(hBaseCacheService.containsKey(row, stringSerializer));
hBaseCacheService.put(row, content, stringSerializer, stringSerializer);
assertTrue(hBaseCacheService.containsKey(row, stringSerializer));
}
@Test
public void testPutIfAbsent() throws InitializationException, IOException {
final String row = "row1";
final String content = "content1";
final TestRunner runner = TestRunners.newTestRunner(TestProcessor.class);
// Mock an HBase Table so we can verify the put operations later
final Table table = Mockito.mock(Table.class);
when(table.getName()).thenReturn(TableName.valueOf(tableName));
// create the controller service and link it to the test processor
final MockHBaseClientService service = configureHBaseClientService(runner, table);
runner.assertValid(service);
final HBaseClientService hBaseClientService = runner.getProcessContext().getProperty(TestProcessor.HBASE_CLIENT_SERVICE)
.asControllerService(HBaseClientService.class);
final DistributedMapCacheClient cacheService = configureHBaseCacheService(runner, hBaseClientService);
runner.assertValid(cacheService);
final DistributedMapCacheClient hBaseCacheService = runner.getProcessContext().getProperty(TestProcessor.HBASE_CACHE_SERVICE)
.asControllerService(DistributedMapCacheClient.class);
assertTrue( hBaseCacheService.putIfAbsent( row, content, stringSerializer, stringSerializer));
// verify only one call to put was made
ArgumentCaptor<Put> capture = ArgumentCaptor.forClass(Put.class);
verify(table, times(1)).put(capture.capture());
verifyPut(row, columnFamily, columnQualifier, content, capture.getValue());
assertFalse(hBaseCacheService.putIfAbsent(row, content, stringSerializer, stringSerializer));
verify(table, times(1)).put(capture.capture());
}
@Test
public void testGetAndPutIfAbsent() throws InitializationException, IOException {
final String row = "row1";
final String content = "content1";
final TestRunner runner = TestRunners.newTestRunner(TestProcessor.class);
// Mock an HBase Table so we can verify the put operations later
final Table table = Mockito.mock(Table.class);
when(table.getName()).thenReturn(TableName.valueOf(tableName));
// create the controller service and link it to the test processor
final MockHBaseClientService service = configureHBaseClientService(runner, table);
runner.assertValid(service);
final HBaseClientService hBaseClientService = runner.getProcessContext().getProperty(TestProcessor.HBASE_CLIENT_SERVICE)
.asControllerService(HBaseClientService.class);
final DistributedMapCacheClient cacheService = configureHBaseCacheService(runner, hBaseClientService);
runner.assertValid(cacheService);
final DistributedMapCacheClient hBaseCacheService = runner.getProcessContext().getProperty(TestProcessor.HBASE_CACHE_SERVICE)
.asControllerService(DistributedMapCacheClient.class);
assertNull( hBaseCacheService.getAndPutIfAbsent( row, content, stringSerializer, stringSerializer, stringDeserializer));
// verify only one call to put was made
ArgumentCaptor<Put> capture = ArgumentCaptor.forClass(Put.class);
verify(table, times(1)).put(capture.capture());
verifyPut(row, columnFamily, columnQualifier, content, capture.getValue());
final String result = hBaseCacheService.getAndPutIfAbsent(row, content, stringSerializer, stringSerializer, stringDeserializer);
verify(table, times(1)).put(capture.capture());
assertEquals(result, content);
}
@Test
public void testFetch() throws InitializationException, IOException {
final String key = "key1";
final String value = "value1";
final byte[] revision = value.getBytes();
final TestRunner runner = TestRunners.newTestRunner(TestProcessor.class);
// Mock an HBase Table so we can verify the put operations later
final Table table = Mockito.mock(Table.class);
when(table.getName()).thenReturn(TableName.valueOf(tableName));
// create the controller service and link it to the test processor
final MockHBaseClientService service = configureHBaseClientService(runner, table);
runner.assertValid(service);
final HBaseClientService hBaseClientService = runner.getProcessContext().getProperty(TestProcessor.HBASE_CLIENT_SERVICE)
.asControllerService(HBaseClientService.class);
final AtomicDistributedMapCacheClient<byte[]> cacheService = configureHBaseCacheService(runner, hBaseClientService);
runner.assertValid(cacheService);
final AtomicDistributedMapCacheClient<byte[]> hBaseCacheService = runner.getProcessContext().getProperty(TestProcessor.HBASE_CACHE_SERVICE)
.asControllerService(AtomicDistributedMapCacheClient.class);
hBaseCacheService.put(key, value, stringSerializer, stringSerializer);
final AtomicCacheEntry<String, String, byte[]> atomicCacheEntry = hBaseCacheService.fetch(key, stringSerializer, stringDeserializer);
assertEquals(key, atomicCacheEntry.getKey());
assertEquals(value, atomicCacheEntry.getValue());
assertArrayEquals(revision, atomicCacheEntry.getRevision().get());
}
@Test
public void testReplace() throws InitializationException, IOException {
final String key = "key1";
final String value = "value1";
final byte[] revision = value.getBytes();
final TestRunner runner = TestRunners.newTestRunner(TestProcessor.class);
// Mock an HBase Table so we can verify the put operations later
final Table table = Mockito.mock(Table.class);
when(table.getName()).thenReturn(TableName.valueOf(tableName));
// create the controller service and link it to the test processor
final MockHBaseClientService service = configureHBaseClientService(runner, table);
runner.assertValid(service);
final HBaseClientService hBaseClientService = runner.getProcessContext().getProperty(TestProcessor.HBASE_CLIENT_SERVICE)
.asControllerService(HBaseClientService.class);
final AtomicDistributedMapCacheClient<byte[]> cacheService = configureHBaseCacheService(runner, hBaseClientService);
runner.assertValid(cacheService);
final AtomicDistributedMapCacheClient<byte[]> hBaseCacheService = runner.getProcessContext().getProperty(TestProcessor.HBASE_CACHE_SERVICE)
.asControllerService(AtomicDistributedMapCacheClient.class);
// First time value should not already be in cache so this should return true
final boolean newResult = hBaseCacheService.replace(new AtomicCacheEntry(key, value, null), stringSerializer, stringSerializer);
assertTrue(newResult);
// Second time value is already in cache so this should return false
final boolean existingResult = hBaseCacheService.replace(new AtomicCacheEntry(key, value, revision), stringSerializer, stringSerializer);
assertTrue(existingResult);
// Third time we're replacing with a new value so this should return true
final boolean replaceResult = hBaseCacheService.replace(new AtomicCacheEntry(key, "value2", revision), stringSerializer, stringSerializer);
assertTrue(replaceResult);
}
private MockHBaseClientService configureHBaseClientService(final TestRunner runner, final Table table) throws InitializationException {
final MockHBaseClientService service = new MockHBaseClientService(table, "family1");
runner.addControllerService("hbaseClient", service);
runner.setProperty(service, HBase_2_ClientService.HADOOP_CONF_FILES, "src/test/resources/hbase-site.xml");
runner.enableControllerService(service);
runner.setProperty(TestProcessor.HBASE_CLIENT_SERVICE, "hbaseClient");
return service;
}
private AtomicDistributedMapCacheClient<byte[]> configureHBaseCacheService(final TestRunner runner, final HBaseClientService service) throws InitializationException {
final HBase_2_ClientMapCacheService cacheService = new HBase_2_ClientMapCacheService();
runner.addControllerService("hbaseCache", cacheService);
runner.setProperty(cacheService, HBase_2_ClientMapCacheService.HBASE_CLIENT_SERVICE, "hbaseClient");
runner.setProperty(cacheService, HBase_2_ClientMapCacheService.HBASE_CACHE_TABLE_NAME, tableName);
runner.setProperty(cacheService, HBase_2_ClientMapCacheService.HBASE_COLUMN_FAMILY, columnFamily);
runner.setProperty(cacheService, HBase_2_ClientMapCacheService.HBASE_COLUMN_QUALIFIER, columnQualifier);
runner.enableControllerService(cacheService);
runner.setProperty(TestProcessor.HBASE_CACHE_SERVICE, "hbaseCache");
return cacheService;
}
private void verifyResultCell(final ResultCell result, final String cf, final String cq, final String val) {
final String colFamily = new String(result.getFamilyArray(), result.getFamilyOffset(), result.getFamilyLength());
assertEquals(cf, colFamily);
final String colQualifier = new String(result.getQualifierArray(), result.getQualifierOffset(), result.getQualifierLength());
assertEquals(cq, colQualifier);
final String value = new String(result.getValueArray(), result.getValueOffset(), result.getValueLength());
assertEquals(val, value);
}
private void verifyPut(String row, String columnFamily, String columnQualifier, String content, Put put) {
assertEquals(row, new String(put.getRow()));
NavigableMap<byte[], List<Cell>> familyCells = put.getFamilyCellMap();
assertEquals(1, familyCells.size());
Map.Entry<byte[], List<Cell>> entry = familyCells.firstEntry();
assertEquals(columnFamily, new String(entry.getKey()));
assertEquals(1, entry.getValue().size());
Cell cell = entry.getValue().get(0);
assertEquals(columnQualifier, new String(cell.getQualifierArray(), cell.getQualifierOffset(), cell.getQualifierLength()));
assertEquals(content, new String(cell.getValueArray(), cell.getValueOffset(), cell.getValueLength()));
}
private static class StringSerializer implements Serializer<String> {
@Override
public void serialize(final String value, final OutputStream out) throws SerializationException, IOException {
out.write(value.getBytes(StandardCharsets.UTF_8));
}
}
private static class StringDeserializer implements Deserializer<String> {
@Override
public String deserialize(byte[] input) throws DeserializationException, IOException {
return new String(input);
}
}
}

View File

@ -1,334 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.nifi.hbase;
import org.apache.hadoop.hbase.Cell;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.client.Table;
import org.apache.nifi.hbase.put.PutColumn;
import org.apache.nifi.hbase.put.PutFlowFile;
import org.apache.nifi.hbase.scan.Column;
import org.apache.nifi.hbase.scan.ResultCell;
import org.apache.nifi.hbase.scan.ResultHandler;
import org.apache.nifi.reporting.InitializationException;
import org.apache.nifi.util.TestRunner;
import org.apache.nifi.util.TestRunners;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
import org.mockito.ArgumentCaptor;
import org.mockito.Mockito;
import java.io.IOException;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.NavigableMap;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertNotNull;
import static org.junit.jupiter.api.Assertions.assertThrows;
import static org.mockito.Mockito.times;
import static org.mockito.Mockito.verify;
import static org.mockito.Mockito.when;
public class TestHBase_2_ClientService {
static final String COL_FAM = "nifi1";
@BeforeEach
public void setup() {
// needed for calls to UserGroupInformation.setConfiguration() to work when passing in
// config with Kerberos authentication enabled
System.setProperty("java.security.krb5.realm", "nifi.com");
System.setProperty("java.security.krb5.kdc", "nifi.kdc");
}
@Test
public void testSinglePut() throws InitializationException, IOException {
final String tableName = "nifi";
final String row = "row1";
final String columnFamily = "family1";
final String columnQualifier = "qualifier1";
final String content = "content1";
final Collection<PutColumn> columns = Collections.singletonList(new PutColumn(columnFamily.getBytes(StandardCharsets.UTF_8), columnQualifier.getBytes(StandardCharsets.UTF_8),
content.getBytes(StandardCharsets.UTF_8)));
final PutFlowFile putFlowFile = new PutFlowFile(tableName, row.getBytes(StandardCharsets.UTF_8), columns, null);
final TestRunner runner = TestRunners.newTestRunner(TestProcessor.class);
// Mock an HBase Table so we can verify the put operations later
final Table table = Mockito.mock(Table.class);
when(table.getName()).thenReturn(TableName.valueOf(tableName));
// create the controller service and link it to the test processor
final HBaseClientService service = configureHBaseClientService(runner, table);
runner.assertValid(service);
// try to put a single cell
final HBaseClientService hBaseClientService = runner.getProcessContext().getProperty(TestProcessor.HBASE_CLIENT_SERVICE)
.asControllerService(HBaseClientService.class);
hBaseClientService.put(tableName, Arrays.asList(putFlowFile));
// verify only one call to put was made
ArgumentCaptor<List> capture = ArgumentCaptor.forClass(List.class);
verify(table, times(1)).put(capture.capture());
// verify only one put was in the list of puts
final List<Put> puts = capture.getValue();
assertEquals(1, puts.size());
verifyPut(row, columnFamily, columnQualifier, content, puts.get(0));
}
@Test
public void testMultiplePutsSameRow() throws IOException, InitializationException {
final String tableName = "nifi";
final String row = "row1";
final String columnFamily = "family1";
final String columnQualifier = "qualifier1";
final String content1 = "content1";
final String content2 = "content2";
final Collection<PutColumn> columns1 = Collections.singletonList(new PutColumn(columnFamily.getBytes(StandardCharsets.UTF_8),
columnQualifier.getBytes(StandardCharsets.UTF_8),
content1.getBytes(StandardCharsets.UTF_8)));
final PutFlowFile putFlowFile1 = new PutFlowFile(tableName, row.getBytes(StandardCharsets.UTF_8), columns1, null);
final Collection<PutColumn> columns2 = Collections.singletonList(new PutColumn(columnFamily.getBytes(StandardCharsets.UTF_8),
columnQualifier.getBytes(StandardCharsets.UTF_8),
content2.getBytes(StandardCharsets.UTF_8)));
final PutFlowFile putFlowFile2 = new PutFlowFile(tableName, row.getBytes(StandardCharsets.UTF_8), columns2, null);
final TestRunner runner = TestRunners.newTestRunner(TestProcessor.class);
// Mock an HBase Table so we can verify the put operations later
final Table table = Mockito.mock(Table.class);
when(table.getName()).thenReturn(TableName.valueOf(tableName));
// create the controller service and link it to the test processor
final HBaseClientService service = configureHBaseClientService(runner, table);
runner.assertValid(service);
// try to put a multiple cells for the same row
final HBaseClientService hBaseClientService = runner.getProcessContext().getProperty(TestProcessor.HBASE_CLIENT_SERVICE)
.asControllerService(HBaseClientService.class);
hBaseClientService.put(tableName, Arrays.asList(putFlowFile1, putFlowFile2));
// verify put was only called once
ArgumentCaptor<List> capture = ArgumentCaptor.forClass(List.class);
verify(table, times(1)).put(capture.capture());
// verify there was only one put in the list of puts
final List<Put> puts = capture.getValue();
assertEquals(1, puts.size());
// verify two cells were added to this one put operation
final NavigableMap<byte[], List<Cell>> familyCells = puts.get(0).getFamilyCellMap();
Map.Entry<byte[], List<Cell>> entry = familyCells.firstEntry();
assertEquals(2, entry.getValue().size());
}
@Test
public void testMultiplePutsDifferentRow() throws IOException, InitializationException {
final String tableName = "nifi";
final String row1 = "row1";
final String row2 = "row2";
final String columnFamily = "family1";
final String columnQualifier = "qualifier1";
final String content1 = "content1";
final String content2 = "content2";
final Collection<PutColumn> columns1 = Collections.singletonList(new PutColumn(columnFamily.getBytes(StandardCharsets.UTF_8),
columnQualifier.getBytes(StandardCharsets.UTF_8),
content1.getBytes(StandardCharsets.UTF_8)));
final PutFlowFile putFlowFile1 = new PutFlowFile(tableName, row1.getBytes(StandardCharsets.UTF_8), columns1, null);
final Collection<PutColumn> columns2 = Collections.singletonList(new PutColumn(columnFamily.getBytes(StandardCharsets.UTF_8),
columnQualifier.getBytes(StandardCharsets.UTF_8),
content2.getBytes(StandardCharsets.UTF_8)));
final PutFlowFile putFlowFile2 = new PutFlowFile(tableName, row2.getBytes(StandardCharsets.UTF_8), columns2, null);
final TestRunner runner = TestRunners.newTestRunner(TestProcessor.class);
// Mock an HBase Table so we can verify the put operations later
final Table table = Mockito.mock(Table.class);
when(table.getName()).thenReturn(TableName.valueOf(tableName));
// create the controller service and link it to the test processor
final HBaseClientService service = configureHBaseClientService(runner, table);
runner.assertValid(service);
// try to put a multiple cells with different rows
final HBaseClientService hBaseClientService = runner.getProcessContext().getProperty(TestProcessor.HBASE_CLIENT_SERVICE)
.asControllerService(HBaseClientService.class);
hBaseClientService.put(tableName, Arrays.asList(putFlowFile1, putFlowFile2));
// verify put was only called once
ArgumentCaptor<List> capture = ArgumentCaptor.forClass(List.class);
verify(table, times(1)).put(capture.capture());
// verify there were two puts in the list
final List<Put> puts = capture.getValue();
assertEquals(2, puts.size());
}
@Test
public void testScan() throws InitializationException, IOException {
final String tableName = "nifi";
final TestRunner runner = TestRunners.newTestRunner(TestProcessor.class);
// Mock an HBase Table so we can verify the put operations later
final Table table = Mockito.mock(Table.class);
when(table.getName()).thenReturn(TableName.valueOf(tableName));
// create the controller service and link it to the test processor
final MockHBaseClientService service = configureHBaseClientService(runner, table);
runner.assertValid(service);
// stage some results in the mock service...
final long now = System.currentTimeMillis();
final Map<String, String> cells = new LinkedHashMap<>();
cells.put("greeting", "hello");
cells.put("name", "nifi");
service.addResult("row0", cells, now - 2);
service.addResult("row1", cells, now - 1);
service.addResult("row2", cells, now - 1);
service.addResult("row3", cells, now);
// perform a scan and verify the four rows were returned
final CollectingResultHandler handler = new CollectingResultHandler();
final HBaseClientService hBaseClientService = runner.getProcessContext().getProperty(TestProcessor.HBASE_CLIENT_SERVICE)
.asControllerService(HBaseClientService.class);
hBaseClientService.scan(tableName, new ArrayList<Column>(), null, now, handler);
assertEquals(4, handler.results.size());
// get row0 using the row id and verify it has 2 cells
final ResultCell[] results = handler.results.get("row0");
assertNotNull(results);
assertEquals(2, results.length);
verifyResultCell(results[0], COL_FAM, "greeting", "hello");
verifyResultCell(results[1], COL_FAM, "name", "nifi");
}
@Test
public void testScanWithValidFilter() throws InitializationException, IOException {
final String tableName = "nifi";
final TestRunner runner = TestRunners.newTestRunner(TestProcessor.class);
// Mock an HBase Table so we can verify the put operations later
final Table table = Mockito.mock(Table.class);
when(table.getName()).thenReturn(TableName.valueOf(tableName));
// create the controller service and link it to the test processor
final MockHBaseClientService service = configureHBaseClientService(runner, table);
runner.assertValid(service);
// perform a scan and verify the four rows were returned
final CollectingResultHandler handler = new CollectingResultHandler();
final HBaseClientService hBaseClientService = runner.getProcessContext().getProperty(TestProcessor.HBASE_CLIENT_SERVICE)
.asControllerService(HBaseClientService.class);
// make sure we parse the filter expression without throwing an exception
final String filter = "PrefixFilter ('Row') AND PageFilter (1) AND FirstKeyOnlyFilter ()";
hBaseClientService.scan(tableName, new ArrayList<Column>(), filter, System.currentTimeMillis(), handler);
}
@Test
public void testScanWithInvalidFilter() throws InitializationException {
final String tableName = "nifi";
final TestRunner runner = TestRunners.newTestRunner(TestProcessor.class);
// Mock an HBase Table so we can verify the put operations later
final Table table = Mockito.mock(Table.class);
when(table.getName()).thenReturn(TableName.valueOf(tableName));
// create the controller service and link it to the test processor
final MockHBaseClientService service = configureHBaseClientService(runner, table);
runner.assertValid(service);
// perform a scan and verify the four rows were returned
final CollectingResultHandler handler = new CollectingResultHandler();
final HBaseClientService hBaseClientService = runner.getProcessContext().getProperty(TestProcessor.HBASE_CLIENT_SERVICE)
.asControllerService(HBaseClientService.class);
// this should throw IllegalArgumentException
final String filter = "this is not a filter";
assertThrows(IllegalArgumentException.class,
() -> hBaseClientService.scan(tableName, new ArrayList<Column>(), filter, System.currentTimeMillis(), handler));
}
private MockHBaseClientService configureHBaseClientService(final TestRunner runner, final Table table) throws InitializationException {
final MockHBaseClientService service = new MockHBaseClientService(table, COL_FAM);
runner.addControllerService("hbaseClient", service);
runner.setProperty(service, HBase_2_ClientService.HADOOP_CONF_FILES, "src/test/resources/hbase-site.xml");
runner.enableControllerService(service);
runner.setProperty(TestProcessor.HBASE_CLIENT_SERVICE, "hbaseClient");
return service;
}
private void verifyResultCell(final ResultCell result, final String cf, final String cq, final String val) {
final String colFamily = new String(result.getFamilyArray(), result.getFamilyOffset(), result.getFamilyLength());
assertEquals(cf, colFamily);
final String colQualifier = new String(result.getQualifierArray(), result.getQualifierOffset(), result.getQualifierLength());
assertEquals(cq, colQualifier);
final String value = new String(result.getValueArray(), result.getValueOffset(), result.getValueLength());
assertEquals(val, value);
}
private void verifyPut(String row, String columnFamily, String columnQualifier, String content, Put put) {
assertEquals(row, new String(put.getRow()));
NavigableMap<byte[], List<Cell>> familyCells = put.getFamilyCellMap();
assertEquals(1, familyCells.size());
Map.Entry<byte[], List<Cell>> entry = familyCells.firstEntry();
assertEquals(columnFamily, new String(entry.getKey()));
assertEquals(1, entry.getValue().size());
Cell cell = entry.getValue().get(0);
assertEquals(columnQualifier, new String(cell.getQualifierArray(), cell.getQualifierOffset(), cell.getQualifierLength()));
assertEquals(content, new String(cell.getValueArray(), cell.getValueOffset(), cell.getValueLength()));
}
// handler that saves results for verification
private static final class CollectingResultHandler implements ResultHandler {
Map<String, ResultCell[]> results = new LinkedHashMap<>();
@Override
public void handle(byte[] row, ResultCell[] resultCells) {
final String rowStr = new String(row, StandardCharsets.UTF_8);
results.put(rowStr, resultCells);
}
}
}

View File

@ -1,124 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.nifi.hbase;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.Table;
import org.apache.nifi.components.PropertyDescriptor;
import org.apache.nifi.processor.AbstractProcessor;
import org.apache.nifi.processor.ProcessContext;
import org.apache.nifi.processor.ProcessSession;
import org.apache.nifi.processor.exception.ProcessException;
import org.apache.nifi.util.TestRunner;
import org.apache.nifi.util.TestRunners;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
import org.mockito.Mockito;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import static org.junit.jupiter.api.Assertions.assertTrue;
import static org.mockito.Mockito.when;
public class TestHBase_2_ListLookupService {
static final String TABLE_NAME = "guids";
private TestRunner runner;
private HBase_2_ListLookupService lookupService;
private MockHBaseClientService clientService;
private NoOpProcessor processor;
@BeforeEach
public void before() throws Exception {
processor = new NoOpProcessor();
runner = TestRunners.newTestRunner(processor);
// setup mock HBaseClientService
final Table table = Mockito.mock(Table.class);
when(table.getName()).thenReturn(TableName.valueOf(TABLE_NAME));
clientService = new MockHBaseClientService(table, "family");
runner.addControllerService("clientService", clientService);
runner.setProperty(clientService, HBase_2_ClientService.HADOOP_CONF_FILES, "src/test/resources/hbase-site.xml");
runner.enableControllerService(clientService);
// setup HBase LookupService
lookupService = new HBase_2_ListLookupService();
runner.addControllerService("lookupService", lookupService);
runner.setProperty(lookupService, HBase_2_ListLookupService.HBASE_CLIENT_SERVICE, "clientService");
runner.setProperty(lookupService, HBase_2_RecordLookupService.TABLE_NAME, TABLE_NAME);
runner.enableControllerService(lookupService);
}
private Optional<List> setupAndRun() throws Exception {
// setup some staged data in the mock client service
final Map<String, String> cells = new HashMap<>();
cells.put("cq1", "v1");
cells.put("cq2", "v2");
clientService.addResult("row1", cells, System.currentTimeMillis());
Map<String, Object> lookup = new HashMap<>();
lookup.put("rowKey", "row1");
return lookupService.lookup(lookup);
}
@Test
public void testLookupKeyList() throws Exception {
Optional<List> results = setupAndRun();
assertTrue(results.isPresent());
List result = results.get();
assertTrue(result.size() == 2);
assertTrue(result.contains("cq1"));
assertTrue(result.contains("cq2"));
}
@Test
public void testLookupValueList() throws Exception {
runner.disableControllerService(lookupService);
runner.setProperty(lookupService, HBase_2_ListLookupService.RETURN_TYPE, HBase_2_ListLookupService.VALUE_LIST);
runner.enableControllerService(lookupService);
Optional<List> results = setupAndRun();
assertTrue(results.isPresent());
List result = results.get();
assertTrue(result.size() == 2);
assertTrue(result.contains("v1"));
assertTrue(result.contains("v2"));
}
// Processor that does nothing just so we can create a TestRunner
private static class NoOpProcessor extends AbstractProcessor {
@Override
protected List<PropertyDescriptor> getSupportedPropertyDescriptors() {
return Collections.emptyList();
}
@Override
public void onTrigger(ProcessContext context, ProcessSession session) throws ProcessException {
}
}
}

View File

@ -1,120 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.nifi.hbase;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.Table;
import org.apache.nifi.serialization.record.Record;
import org.apache.nifi.util.TestRunner;
import org.apache.nifi.util.TestRunners;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
import org.mockito.Mockito;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertNotNull;
import static org.mockito.Mockito.when;
public class TestHBase_2_RecordLookupService {
static final String TABLE_NAME = "guids";
static final String ROW = "row1";
private TestRunner runner;
private HBase_2_RecordLookupService lookupService;
private MockHBaseClientService clientService;
private TestRecordLookupProcessor testLookupProcessor;
@BeforeEach
public void before() throws Exception {
testLookupProcessor = new TestRecordLookupProcessor();
runner = TestRunners.newTestRunner(testLookupProcessor);
// setup mock HBaseClientService
final Table table = Mockito.mock(Table.class);
when(table.getName()).thenReturn(TableName.valueOf(TABLE_NAME));
clientService = new MockHBaseClientService(table, "family");
runner.addControllerService("clientService", clientService);
runner.setProperty(clientService, HBase_2_ClientService.HADOOP_CONF_FILES, "src/test/resources/hbase-site.xml");
runner.enableControllerService(clientService);
// setup HBase LookupService
lookupService = new HBase_2_RecordLookupService();
runner.addControllerService("lookupService", lookupService);
runner.setProperty(lookupService, HBase_2_RecordLookupService.HBASE_CLIENT_SERVICE, "clientService");
runner.setProperty(lookupService, HBase_2_RecordLookupService.TABLE_NAME, TABLE_NAME);
runner.enableControllerService(lookupService);
// setup test processor
runner.setProperty(TestRecordLookupProcessor.HBASE_LOOKUP_SERVICE, "lookupService");
runner.setProperty(TestRecordLookupProcessor.HBASE_ROW, ROW);
}
@Test
public void testSuccessfulLookupAllColumns() {
// setup some staged data in the mock client service
final Map<String, String> cells = new HashMap<>();
cells.put("cq1", "v1");
cells.put("cq2", "v2");
clientService.addResult("row1", cells, System.currentTimeMillis());
// run the processor
runner.enqueue("trigger flow file");
runner.run();
runner.assertAllFlowFilesTransferred(TestRecordLookupProcessor.REL_SUCCESS);
final List<Record> records = testLookupProcessor.getLookedupRecords();
assertNotNull(records);
assertEquals(1, records.size());
final Record record = records.get(0);
assertEquals("v1", record.getAsString("cq1"));
assertEquals("v2", record.getAsString("cq2"));
}
@Test
public void testLookupWithNoResults() {
// run the processor
runner.enqueue("trigger flow file");
runner.run();
runner.assertAllFlowFilesTransferred(TestRecordLookupProcessor.REL_FAILURE);
final List<Record> records = testLookupProcessor.getLookedupRecords();
assertNotNull(records);
assertEquals(0, records.size());
}
@Test
public void testLookupWhenMissingRowKeyCoordinate() {
runner.removeProperty(TestRecordLookupProcessor.HBASE_ROW);
// run the processor
runner.enqueue("trigger flow file");
runner.run();
runner.assertAllFlowFilesTransferred(TestRecordLookupProcessor.REL_FAILURE);
final List<Record> records = testLookupProcessor.getLookedupRecords();
assertNotNull(records);
assertEquals(0, records.size());
}
}

View File

@ -1,55 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.nifi.hbase;
import org.apache.nifi.components.PropertyDescriptor;
import org.apache.nifi.distributed.cache.client.DistributedMapCacheClient;
import org.apache.nifi.processor.AbstractProcessor;
import org.apache.nifi.processor.ProcessContext;
import org.apache.nifi.processor.ProcessSession;
import org.apache.nifi.processor.exception.ProcessException;
import java.util.ArrayList;
import java.util.List;
public class TestProcessor extends AbstractProcessor {
static final PropertyDescriptor HBASE_CLIENT_SERVICE = new PropertyDescriptor.Builder()
.name("HBase Client Service")
.description("HBaseClientService")
.identifiesControllerService(HBaseClientService.class)
.required(true)
.build();
static final PropertyDescriptor HBASE_CACHE_SERVICE = new PropertyDescriptor.Builder()
.name("HBase Cache Service")
.description("HBaseCacheService")
.identifiesControllerService(DistributedMapCacheClient.class)
.required(true)
.build();
@Override
public void onTrigger(ProcessContext context, ProcessSession session) throws ProcessException { }
@Override
protected List<PropertyDescriptor> getSupportedPropertyDescriptors() {
List<PropertyDescriptor> propDescs = new ArrayList<>();
propDescs.add(HBASE_CLIENT_SERVICE);
propDescs.add(HBASE_CACHE_SERVICE);
return propDescs;
}
}

View File

@ -1,113 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.nifi.hbase;
import org.apache.nifi.components.PropertyDescriptor;
import org.apache.nifi.flowfile.FlowFile;
import org.apache.nifi.lookup.LookupFailureException;
import org.apache.nifi.lookup.LookupService;
import org.apache.nifi.processor.AbstractProcessor;
import org.apache.nifi.processor.ProcessContext;
import org.apache.nifi.processor.ProcessSession;
import org.apache.nifi.processor.Relationship;
import org.apache.nifi.processor.exception.ProcessException;
import org.apache.nifi.processor.util.StandardValidators;
import org.apache.nifi.serialization.record.Record;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import java.util.Set;
public class TestRecordLookupProcessor extends AbstractProcessor {
static final PropertyDescriptor HBASE_LOOKUP_SERVICE = new PropertyDescriptor.Builder()
.name("HBase Lookup Service")
.description("HBaseLookupService")
.identifiesControllerService(LookupService.class)
.required(true)
.build();
static final PropertyDescriptor HBASE_ROW = new PropertyDescriptor.Builder()
.name("HBase Row Id")
.description("The Row Id to Lookup.")
.addValidator(StandardValidators.NON_BLANK_VALIDATOR)
.build();
static final Relationship REL_SUCCESS = new Relationship.Builder()
.name("success")
.description("All success FlowFiles are routed to this relationship")
.build();
static final Relationship REL_FAILURE = new Relationship.Builder()
.name("failure")
.description("All failed FlowFiles are routed to this relationship")
.build();
private List<Record> lookedupRecords = new ArrayList<>();
@Override
protected List<PropertyDescriptor> getSupportedPropertyDescriptors() {
List<PropertyDescriptor> propDescs = new ArrayList<>();
propDescs.add(HBASE_LOOKUP_SERVICE);
propDescs.add(HBASE_ROW);
return propDescs;
}
@Override
public Set<Relationship> getRelationships() {
Set<Relationship> relationships = new HashSet<>();
relationships.add(REL_SUCCESS);
relationships.add(REL_FAILURE);
return relationships;
}
@Override
public void onTrigger(ProcessContext context, ProcessSession session) throws ProcessException {
FlowFile flowFile = session.get();
if (flowFile == null) {
return;
}
final String rowKey = context.getProperty(HBASE_ROW).getValue();
final Map<String, Object> coordinates = new HashMap<>();
coordinates.put(HBase_2_RecordLookupService.ROW_KEY_KEY, rowKey);
final LookupService<Record> lookupService = context.getProperty(HBASE_LOOKUP_SERVICE).asControllerService(LookupService.class);
try {
final Optional<Record> record = lookupService.lookup(coordinates);
if (record.isPresent()) {
lookedupRecords.add(record.get());
session.transfer(flowFile, REL_SUCCESS);
} else {
session.transfer(flowFile, REL_FAILURE);
}
} catch (LookupFailureException e) {
session.transfer(flowFile, REL_FAILURE);
}
}
public List<Record> getLookedupRecords() {
return new ArrayList<>(lookedupRecords);
}
}

View File

@ -1,30 +0,0 @@
<?xml version="1.0"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<configuration>
<property>
<name>fs.default.name</name>
<value>hdfs://hbase</value>
</property>
<property>
<name>hadoop.security.authentication</name>
<value>kerberos</value>
</property>
<property>
<name>hadoop.security.authorization</name>
<value>true</value>
</property>
</configuration>

View File

@ -1,22 +0,0 @@
<?xml version="1.0"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<configuration>
<property>
<name>fs.default.name</name>
<value>hdfs://hbase</value>
</property>
</configuration>

View File

@ -1,30 +0,0 @@
<?xml version="1.0"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<configuration>
<property>
<name>fs.default.name</name>
<value>hdfs://hbase</value>
</property>
<property>
<name>hbase.security.authentication</name>
<value>kerberos</value>
</property>
<property>
<name>hbase.security.authorization</name>
<value>true</value>
</property>
</configuration>

View File

@ -1,22 +0,0 @@
<?xml version="1.0"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<configuration>
<property>
<name>fs.default.name</name>
<value>hdfs://hbase</value>
</property>
</configuration>

View File

@ -1,57 +0,0 @@
<?xml version="1.0" encoding="UTF-8"?>
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 https://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<parent>
<groupId>org.apache.nifi</groupId>
<artifactId>nifi-standard-shared-bom</artifactId>
<version>2.0.0-SNAPSHOT</version>
<relativePath>../../nifi-standard-shared-bundle/nifi-standard-shared-bom</relativePath>
</parent>
<artifactId>nifi-hbase_2-client-service-bundle</artifactId>
<packaging>pom</packaging>
<properties>
<hbase.version>2.6.0-hadoop3</hbase.version>
</properties>
<modules>
<module>nifi-hbase_2-client-service</module>
<module>nifi-hbase_2-client-service-nar</module>
</modules>
<build>
<plugins>
<plugin>
<groupId>org.apache.rat</groupId>
<artifactId>apache-rat-plugin</artifactId>
<configuration>
<excludes combine.children="append">
<exclude>src/test/resources/fake.keytab</exclude>
<exclude>src/test/resources/krb5.conf</exclude>
</excludes>
</configuration>
</plugin>
</plugins>
</build>
<dependencyManagement>
<dependencies>
<!-- Override Guava 27 -->
<dependency>
<groupId>com.google.guava</groupId>
<artifactId>guava</artifactId>
<version>33.3.1-jre</version>
</dependency>
</dependencies>
</dependencyManagement>
</project>

View File

@ -64,11 +64,6 @@
<artifactId>nifi-dbcp-service-api</artifactId>
<scope>compile</scope>
</dependency>
<dependency>
<groupId>org.apache.nifi</groupId>
<artifactId>nifi-hbase-client-service-api</artifactId>
<scope>compile</scope>
</dependency>
<dependency>
<groupId>org.apache.nifi</groupId>
<artifactId>nifi-oauth2-provider-api</artifactId>

View File

@ -38,8 +38,6 @@
<module>nifi-db-schema-registry-bundle</module>
<module>nifi-dbcp-service-api</module>
<module>nifi-dbcp-service-bundle</module>
<module>nifi-hbase-client-service-api</module>
<module>nifi-hbase_2-client-service-bundle</module>
<module>nifi-schema-registry-service-api</module>
<module>nifi-record-serialization-service-api</module>
<module>nifi-record-serialization-services-bundle</module>

View File

@ -38,7 +38,6 @@
<module>nifi-enrich-bundle</module>
<module>nifi-hl7-bundle</module>
<module>nifi-mongodb-bundle</module>
<module>nifi-hbase-bundle</module>
<module>nifi-asana-bundle</module>
<module>nifi-media-bundle</module>
<module>nifi-avro-bundle</module>