mirror of https://github.com/apache/nifi.git
NIFI-2613 Apache POI processor for Excel to CSV
Signed-off-by: James Wing <jvwing@gmail.com> This closes #929.
This commit is contained in:
parent
47c6718fe2
commit
d05727b8c0
|
@ -541,6 +541,11 @@ The following binary components are provided under the Apache Software License v
|
||||||
Apache Kafka
|
Apache Kafka
|
||||||
Copyright 2012 The Apache Software Foundation.
|
Copyright 2012 The Apache Software Foundation.
|
||||||
|
|
||||||
|
(ASLv2) Apache POI
|
||||||
|
The following NOTICE information applies:
|
||||||
|
Apache POI
|
||||||
|
Copyright 2012 The Apache Software Foundation.
|
||||||
|
|
||||||
(ASLv2) Yammer Metrics
|
(ASLv2) Yammer Metrics
|
||||||
The following NOTICE information applies:
|
The following NOTICE information applies:
|
||||||
Metrics
|
Metrics
|
||||||
|
|
|
@ -208,6 +208,11 @@ language governing permissions and limitations under the License. -->
|
||||||
<artifactId>nifi-html-nar</artifactId>
|
<artifactId>nifi-html-nar</artifactId>
|
||||||
<type>nar</type>
|
<type>nar</type>
|
||||||
</dependency>
|
</dependency>
|
||||||
|
<dependency>
|
||||||
|
<groupId>org.apache.nifi</groupId>
|
||||||
|
<artifactId>nifi-poi-nar</artifactId>
|
||||||
|
<type>nar</type>
|
||||||
|
</dependency>
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>org.apache.nifi</groupId>
|
<groupId>org.apache.nifi</groupId>
|
||||||
<artifactId>nifi-kite-nar</artifactId>
|
<artifactId>nifi-kite-nar</artifactId>
|
||||||
|
|
|
@ -0,0 +1,41 @@
|
||||||
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
<!--
|
||||||
|
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
contributor license agreements. See the NOTICE file distributed with
|
||||||
|
this work for additional information regarding copyright ownership.
|
||||||
|
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
(the "License"); you may not use this file except in compliance with
|
||||||
|
the License. You may obtain a copy of the License at
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
Unless required by applicable law or agreed to in writing, software
|
||||||
|
distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
See the License for the specific language governing permissions and
|
||||||
|
limitations under the License.
|
||||||
|
-->
|
||||||
|
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
|
||||||
|
<modelVersion>4.0.0</modelVersion>
|
||||||
|
|
||||||
|
<parent>
|
||||||
|
<groupId>org.apache.nifi</groupId>
|
||||||
|
<artifactId>nifi-poi-bundle</artifactId>
|
||||||
|
<version>1.2.0-SNAPSHOT</version>
|
||||||
|
</parent>
|
||||||
|
|
||||||
|
<artifactId>nifi-poi-nar</artifactId>
|
||||||
|
<version>1.2.0-SNAPSHOT</version>
|
||||||
|
<packaging>nar</packaging>
|
||||||
|
<properties>
|
||||||
|
<maven.javadoc.skip>true</maven.javadoc.skip>
|
||||||
|
<source.skip>true</source.skip>
|
||||||
|
</properties>
|
||||||
|
|
||||||
|
<dependencies>
|
||||||
|
<dependency>
|
||||||
|
<groupId>org.apache.nifi</groupId>
|
||||||
|
<artifactId>nifi-poi-processors</artifactId>
|
||||||
|
<version>1.2.0-SNAPSHOT</version>
|
||||||
|
</dependency>
|
||||||
|
</dependencies>
|
||||||
|
|
||||||
|
</project>
|
|
@ -0,0 +1,209 @@
|
||||||
|
|
||||||
|
Apache License
|
||||||
|
Version 2.0, January 2004
|
||||||
|
http://www.apache.org/licenses/
|
||||||
|
|
||||||
|
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
|
||||||
|
|
||||||
|
1. Definitions.
|
||||||
|
|
||||||
|
"License" shall mean the terms and conditions for use, reproduction,
|
||||||
|
and distribution as defined by Sections 1 through 9 of this document.
|
||||||
|
|
||||||
|
"Licensor" shall mean the copyright owner or entity authorized by
|
||||||
|
the copyright owner that is granting the License.
|
||||||
|
|
||||||
|
"Legal Entity" shall mean the union of the acting entity and all
|
||||||
|
other entities that control, are controlled by, or are under common
|
||||||
|
control with that entity. For the purposes of this definition,
|
||||||
|
"control" means (i) the power, direct or indirect, to cause the
|
||||||
|
direction or management of such entity, whether by contract or
|
||||||
|
otherwise, or (ii) ownership of fifty percent (50%) or more of the
|
||||||
|
outstanding shares, or (iii) beneficial ownership of such entity.
|
||||||
|
|
||||||
|
"You" (or "Your") shall mean an individual or Legal Entity
|
||||||
|
exercising permissions granted by this License.
|
||||||
|
|
||||||
|
"Source" form shall mean the preferred form for making modifications,
|
||||||
|
including but not limited to software source code, documentation
|
||||||
|
source, and configuration files.
|
||||||
|
|
||||||
|
"Object" form shall mean any form resulting from mechanical
|
||||||
|
transformation or translation of a Source form, including but
|
||||||
|
not limited to compiled object code, generated documentation,
|
||||||
|
and conversions to other media types.
|
||||||
|
|
||||||
|
"Work" shall mean the work of authorship, whether in Source or
|
||||||
|
Object form, made available under the License, as indicated by a
|
||||||
|
copyright notice that is included in or attached to the work
|
||||||
|
(an example is provided in the Appendix below).
|
||||||
|
|
||||||
|
"Derivative Works" shall mean any work, whether in Source or Object
|
||||||
|
form, that is based on (or derived from) the Work and for which the
|
||||||
|
editorial revisions, annotations, elaborations, or other modifications
|
||||||
|
represent, as a whole, an original work of authorship. For the purposes
|
||||||
|
of this License, Derivative Works shall not include works that remain
|
||||||
|
separable from, or merely link (or bind by name) to the interfaces of,
|
||||||
|
the Work and Derivative Works thereof.
|
||||||
|
|
||||||
|
"Contribution" shall mean any work of authorship, including
|
||||||
|
the original version of the Work and any modifications or additions
|
||||||
|
to that Work or Derivative Works thereof, that is intentionally
|
||||||
|
submitted to Licensor for inclusion in the Work by the copyright owner
|
||||||
|
or by an individual or Legal Entity authorized to submit on behalf of
|
||||||
|
the copyright owner. For the purposes of this definition, "submitted"
|
||||||
|
means any form of electronic, verbal, or written communication sent
|
||||||
|
to the Licensor or its representatives, including but not limited to
|
||||||
|
communication on electronic mailing lists, source code control systems,
|
||||||
|
and issue tracking systems that are managed by, or on behalf of, the
|
||||||
|
Licensor for the purpose of discussing and improving the Work, but
|
||||||
|
excluding communication that is conspicuously marked or otherwise
|
||||||
|
designated in writing by the copyright owner as "Not a Contribution."
|
||||||
|
|
||||||
|
"Contributor" shall mean Licensor and any individual or Legal Entity
|
||||||
|
on behalf of whom a Contribution has been received by Licensor and
|
||||||
|
subsequently incorporated within the Work.
|
||||||
|
|
||||||
|
2. Grant of Copyright License. Subject to the terms and conditions of
|
||||||
|
this License, each Contributor hereby grants to You a perpetual,
|
||||||
|
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||||
|
copyright license to reproduce, prepare Derivative Works of,
|
||||||
|
publicly display, publicly perform, sublicense, and distribute the
|
||||||
|
Work and such Derivative Works in Source or Object form.
|
||||||
|
|
||||||
|
3. Grant of Patent License. Subject to the terms and conditions of
|
||||||
|
this License, each Contributor hereby grants to You a perpetual,
|
||||||
|
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||||
|
(except as stated in this section) patent license to make, have made,
|
||||||
|
use, offer to sell, sell, import, and otherwise transfer the Work,
|
||||||
|
where such license applies only to those patent claims licensable
|
||||||
|
by such Contributor that are necessarily infringed by their
|
||||||
|
Contribution(s) alone or by combination of their Contribution(s)
|
||||||
|
with the Work to which such Contribution(s) was submitted. If You
|
||||||
|
institute patent litigation against any entity (including a
|
||||||
|
cross-claim or counterclaim in a lawsuit) alleging that the Work
|
||||||
|
or a Contribution incorporated within the Work constitutes direct
|
||||||
|
or contributory patent infringement, then any patent licenses
|
||||||
|
granted to You under this License for that Work shall terminate
|
||||||
|
as of the date such litigation is filed.
|
||||||
|
|
||||||
|
4. Redistribution. You may reproduce and distribute copies of the
|
||||||
|
Work or Derivative Works thereof in any medium, with or without
|
||||||
|
modifications, and in Source or Object form, provided that You
|
||||||
|
meet the following conditions:
|
||||||
|
|
||||||
|
(a) You must give any other recipients of the Work or
|
||||||
|
Derivative Works a copy of this License; and
|
||||||
|
|
||||||
|
(b) You must cause any modified files to carry prominent notices
|
||||||
|
stating that You changed the files; and
|
||||||
|
|
||||||
|
(c) You must retain, in the Source form of any Derivative Works
|
||||||
|
that You distribute, all copyright, patent, trademark, and
|
||||||
|
attribution notices from the Source form of the Work,
|
||||||
|
excluding those notices that do not pertain to any part of
|
||||||
|
the Derivative Works; and
|
||||||
|
|
||||||
|
(d) If the Work includes a "NOTICE" text file as part of its
|
||||||
|
distribution, then any Derivative Works that You distribute must
|
||||||
|
include a readable copy of the attribution notices contained
|
||||||
|
within such NOTICE file, excluding those notices that do not
|
||||||
|
pertain to any part of the Derivative Works, in at least one
|
||||||
|
of the following places: within a NOTICE text file distributed
|
||||||
|
as part of the Derivative Works; within the Source form or
|
||||||
|
documentation, if provided along with the Derivative Works; or,
|
||||||
|
within a display generated by the Derivative Works, if and
|
||||||
|
wherever such third-party notices normally appear. The contents
|
||||||
|
of the NOTICE file are for informational purposes only and
|
||||||
|
do not modify the License. You may add Your own attribution
|
||||||
|
notices within Derivative Works that You distribute, alongside
|
||||||
|
or as an addendum to the NOTICE text from the Work, provided
|
||||||
|
that such additional attribution notices cannot be construed
|
||||||
|
as modifying the License.
|
||||||
|
|
||||||
|
You may add Your own copyright statement to Your modifications and
|
||||||
|
may provide additional or different license terms and conditions
|
||||||
|
for use, reproduction, or distribution of Your modifications, or
|
||||||
|
for any such Derivative Works as a whole, provided Your use,
|
||||||
|
reproduction, and distribution of the Work otherwise complies with
|
||||||
|
the conditions stated in this License.
|
||||||
|
|
||||||
|
5. Submission of Contributions. Unless You explicitly state otherwise,
|
||||||
|
any Contribution intentionally submitted for inclusion in the Work
|
||||||
|
by You to the Licensor shall be under the terms and conditions of
|
||||||
|
this License, without any additional terms or conditions.
|
||||||
|
Notwithstanding the above, nothing herein shall supersede or modify
|
||||||
|
the terms of any separate license agreement you may have executed
|
||||||
|
with Licensor regarding such Contributions.
|
||||||
|
|
||||||
|
6. Trademarks. This License does not grant permission to use the trade
|
||||||
|
names, trademarks, service marks, or product names of the Licensor,
|
||||||
|
except as required for reasonable and customary use in describing the
|
||||||
|
origin of the Work and reproducing the content of the NOTICE file.
|
||||||
|
|
||||||
|
7. Disclaimer of Warranty. Unless required by applicable law or
|
||||||
|
agreed to in writing, Licensor provides the Work (and each
|
||||||
|
Contributor provides its Contributions) on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
||||||
|
implied, including, without limitation, any warranties or conditions
|
||||||
|
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
|
||||||
|
PARTICULAR PURPOSE. You are solely responsible for determining the
|
||||||
|
appropriateness of using or redistributing the Work and assume any
|
||||||
|
risks associated with Your exercise of permissions under this License.
|
||||||
|
|
||||||
|
8. Limitation of Liability. In no event and under no legal theory,
|
||||||
|
whether in tort (including negligence), contract, or otherwise,
|
||||||
|
unless required by applicable law (such as deliberate and grossly
|
||||||
|
negligent acts) or agreed to in writing, shall any Contributor be
|
||||||
|
liable to You for damages, including any direct, indirect, special,
|
||||||
|
incidental, or consequential damages of any character arising as a
|
||||||
|
result of this License or out of the use or inability to use the
|
||||||
|
Work (including but not limited to damages for loss of goodwill,
|
||||||
|
work stoppage, computer failure or malfunction, or any and all
|
||||||
|
other commercial damages or losses), even if such Contributor
|
||||||
|
has been advised of the possibility of such damages.
|
||||||
|
|
||||||
|
9. Accepting Warranty or Additional Liability. While redistributing
|
||||||
|
the Work or Derivative Works thereof, You may choose to offer,
|
||||||
|
and charge a fee for, acceptance of support, warranty, indemnity,
|
||||||
|
or other liability obligations and/or rights consistent with this
|
||||||
|
License. However, in accepting such obligations, You may act only
|
||||||
|
on Your own behalf and on Your sole responsibility, not on behalf
|
||||||
|
of any other Contributor, and only if You agree to indemnify,
|
||||||
|
defend, and hold each Contributor harmless for any liability
|
||||||
|
incurred by, or claims asserted against, such Contributor by reason
|
||||||
|
of your accepting any such warranty or additional liability.
|
||||||
|
|
||||||
|
END OF TERMS AND CONDITIONS
|
||||||
|
|
||||||
|
APPENDIX: How to apply the Apache License to your work.
|
||||||
|
|
||||||
|
To apply the Apache License to your work, attach the following
|
||||||
|
boilerplate notice, with the fields enclosed by brackets "[]"
|
||||||
|
replaced with your own identifying information. (Don't include
|
||||||
|
the brackets!) The text should be enclosed in the appropriate
|
||||||
|
comment syntax for the file format. We also recommend that a
|
||||||
|
file or class name and description of purpose be included on the
|
||||||
|
same "printed page" as the copyright notice for easier
|
||||||
|
identification within third-party archives.
|
||||||
|
|
||||||
|
Copyright [yyyy] [name of copyright owner]
|
||||||
|
|
||||||
|
Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
you may not use this file except in compliance with the License.
|
||||||
|
You may obtain a copy of the License at
|
||||||
|
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
Unless required by applicable law or agreed to in writing, software
|
||||||
|
distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
See the License for the specific language governing permissions and
|
||||||
|
limitations under the License.
|
||||||
|
|
||||||
|
APACHE NIFI SUBCOMPONENTS:
|
||||||
|
|
||||||
|
The Apache NiFi project contains subcomponents with separate copyright
|
||||||
|
notices and license terms. Your use of the source code for the these
|
||||||
|
subcomponents is subject to the terms and conditions of the following
|
||||||
|
licenses.
|
|
@ -0,0 +1,35 @@
|
||||||
|
nifi-poi-nar
|
||||||
|
Copyright 2017 The Apache Software Foundation
|
||||||
|
|
||||||
|
This product includes software developed at
|
||||||
|
The Apache Software Foundation (http://www.apache.org/).
|
||||||
|
|
||||||
|
===========================================
|
||||||
|
Apache Software License v2
|
||||||
|
===========================================
|
||||||
|
|
||||||
|
The following binary components are provided under the Apache Software License v2
|
||||||
|
|
||||||
|
(ASLv2) Apache POI
|
||||||
|
The following NOTICE information applies:
|
||||||
|
|
||||||
|
This product contains parts that were originally based on software from BEA.
|
||||||
|
Copyright (c) 2000-2003, BEA Systems, <http://www.bea.com/>.
|
||||||
|
|
||||||
|
This product contains W3C XML Schema documents. Copyright 2001-2003 (c)
|
||||||
|
World Wide Web Consortium (Massachusetts Institute of Technology, European
|
||||||
|
Research Consortium for Informatics and Mathematics, Keio University)
|
||||||
|
|
||||||
|
This product contains the Piccolo XML Parser for Java
|
||||||
|
(http://piccolo.sourceforge.net/). Copyright 2002 Yuval Oren.
|
||||||
|
|
||||||
|
This product contains the chunks_parse_cmds.tbl file from the vsdump program.
|
||||||
|
Copyright (C) 2006-2007 Valek Filippov (frob@df.ru)
|
||||||
|
|
||||||
|
This product contains parts of the eID Applet project
|
||||||
|
(http://eid-applet.googlecode.com). Copyright (c) 2009-2014
|
||||||
|
FedICT (federal ICT department of Belgium), e-Contract.be BVBA (https://www.e-contract.be),
|
||||||
|
Bart Hanssens from FedICT
|
||||||
|
|
||||||
|
CurvesAIP is BSD-licensed software (https://github.com/virtuald/curvesapi/)
|
||||||
|
Copyright (c) 2005, Graph Builder
|
|
@ -0,0 +1,82 @@
|
||||||
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
<!--
|
||||||
|
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
contributor license agreements. See the NOTICE file distributed with
|
||||||
|
this work for additional information regarding copyright ownership.
|
||||||
|
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
(the "License"); you may not use this file except in compliance with
|
||||||
|
the License. You may obtain a copy of the License at
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
Unless required by applicable law or agreed to in writing, software
|
||||||
|
distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
See the License for the specific language governing permissions and
|
||||||
|
limitations under the License.
|
||||||
|
-->
|
||||||
|
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
|
||||||
|
<modelVersion>4.0.0</modelVersion>
|
||||||
|
|
||||||
|
<properties>
|
||||||
|
<poi.version>3.14</poi.version>
|
||||||
|
</properties>
|
||||||
|
|
||||||
|
<parent>
|
||||||
|
<groupId>org.apache.nifi</groupId>
|
||||||
|
<artifactId>nifi-poi-bundle</artifactId>
|
||||||
|
<version>1.2.0-SNAPSHOT</version>
|
||||||
|
</parent>
|
||||||
|
|
||||||
|
<artifactId>nifi-poi-processors</artifactId>
|
||||||
|
<packaging>jar</packaging>
|
||||||
|
|
||||||
|
<dependencies>
|
||||||
|
<!-- https://mvnrepository.com/artifact/xerces/xerces -->
|
||||||
|
<dependency>
|
||||||
|
<groupId>xerces</groupId>
|
||||||
|
<artifactId>xerces</artifactId>
|
||||||
|
<version>2.4.0</version>
|
||||||
|
</dependency>
|
||||||
|
|
||||||
|
<dependency>
|
||||||
|
<groupId>xerces</groupId>
|
||||||
|
<artifactId>xercesImpl</artifactId>
|
||||||
|
<version>2.11.0</version>
|
||||||
|
</dependency>
|
||||||
|
|
||||||
|
<dependency>
|
||||||
|
<groupId>org.apache.poi</groupId>
|
||||||
|
<artifactId>poi</artifactId>
|
||||||
|
<version>${poi.version}</version>
|
||||||
|
</dependency>
|
||||||
|
<dependency>
|
||||||
|
<groupId>org.apache.poi</groupId>
|
||||||
|
<artifactId>poi-ooxml</artifactId>
|
||||||
|
<version>${poi.version}</version>
|
||||||
|
</dependency>
|
||||||
|
|
||||||
|
<dependency>
|
||||||
|
<groupId>org.apache.nifi</groupId>
|
||||||
|
<artifactId>nifi-api</artifactId>
|
||||||
|
</dependency>
|
||||||
|
<dependency>
|
||||||
|
<groupId>org.apache.nifi</groupId>
|
||||||
|
<artifactId>nifi-processor-utils</artifactId>
|
||||||
|
</dependency>
|
||||||
|
<dependency>
|
||||||
|
<groupId>org.apache.nifi</groupId>
|
||||||
|
<artifactId>nifi-mock</artifactId>
|
||||||
|
<scope>test</scope>
|
||||||
|
</dependency>
|
||||||
|
<dependency>
|
||||||
|
<groupId>org.slf4j</groupId>
|
||||||
|
<artifactId>slf4j-simple</artifactId>
|
||||||
|
<scope>test</scope>
|
||||||
|
</dependency>
|
||||||
|
<dependency>
|
||||||
|
<groupId>junit</groupId>
|
||||||
|
<artifactId>junit</artifactId>
|
||||||
|
<version>4.11</version>
|
||||||
|
<scope>test</scope>
|
||||||
|
</dependency>
|
||||||
|
</dependencies>
|
||||||
|
</project>
|
|
@ -0,0 +1,418 @@
|
||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
package org.apache.nifi.processors.poi;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.io.InputStream;
|
||||||
|
import java.io.OutputStream;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.Collections;
|
||||||
|
import java.util.HashSet;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Set;
|
||||||
|
|
||||||
|
import org.apache.commons.io.FilenameUtils;
|
||||||
|
import org.apache.commons.lang3.StringUtils;
|
||||||
|
import org.apache.nifi.annotation.behavior.WritesAttribute;
|
||||||
|
import org.apache.nifi.annotation.behavior.WritesAttributes;
|
||||||
|
import org.apache.nifi.annotation.documentation.CapabilityDescription;
|
||||||
|
import org.apache.nifi.annotation.documentation.Tags;
|
||||||
|
import org.apache.nifi.components.PropertyDescriptor;
|
||||||
|
import org.apache.nifi.flowfile.FlowFile;
|
||||||
|
import org.apache.nifi.flowfile.attributes.CoreAttributes;
|
||||||
|
import org.apache.nifi.processor.AbstractProcessor;
|
||||||
|
import org.apache.nifi.processor.ProcessContext;
|
||||||
|
import org.apache.nifi.processor.ProcessSession;
|
||||||
|
import org.apache.nifi.processor.ProcessorInitializationContext;
|
||||||
|
import org.apache.nifi.processor.Relationship;
|
||||||
|
import org.apache.nifi.processor.exception.ProcessException;
|
||||||
|
import org.apache.nifi.processor.io.InputStreamCallback;
|
||||||
|
import org.apache.nifi.processor.io.OutputStreamCallback;
|
||||||
|
import org.apache.nifi.processor.util.StandardValidators;
|
||||||
|
import org.apache.poi.openxml4j.exceptions.InvalidFormatException;
|
||||||
|
import org.apache.poi.openxml4j.exceptions.OpenXML4JException;
|
||||||
|
import org.apache.poi.openxml4j.opc.OPCPackage;
|
||||||
|
import org.apache.poi.xssf.eventusermodel.XSSFReader;
|
||||||
|
import org.apache.poi.xssf.model.SharedStringsTable;
|
||||||
|
import org.apache.poi.xssf.usermodel.XSSFRichTextString;
|
||||||
|
import org.xml.sax.Attributes;
|
||||||
|
import org.xml.sax.InputSource;
|
||||||
|
import org.xml.sax.SAXException;
|
||||||
|
import org.xml.sax.XMLReader;
|
||||||
|
import org.xml.sax.helpers.DefaultHandler;
|
||||||
|
import org.xml.sax.helpers.XMLReaderFactory;
|
||||||
|
|
||||||
|
|
||||||
|
@Tags({"excel", "csv", "poi"})
|
||||||
|
@CapabilityDescription("Consumes a Microsoft Excel document and converts each worksheet to csv. Each sheet from the incoming Excel " +
|
||||||
|
"document will generate a new Flowfile that will be output from this processor. Each output Flowfile's contents will be formatted as a csv file " +
|
||||||
|
"where the each row from the excel sheet is output as a newline in the csv file. This processor is currently only capable of processing .xlsx " +
|
||||||
|
"(XSSF 2007 OOXML file format) Excel documents and not older .xls (HSSF '97(-2007) file format) documents. This processor also expects well formatted " +
|
||||||
|
"CSV content and will not escape cell's containing invalid content such as newlines or additional commas.")
|
||||||
|
@WritesAttributes({@WritesAttribute(attribute="sheetname", description="The name of the Excel sheet that this particular row of data came from in the Excel document"),
|
||||||
|
@WritesAttribute(attribute="numrows", description="The number of rows in this Excel Sheet"),
|
||||||
|
@WritesAttribute(attribute="sourcefilename", description="The name of the Excel document file that this data originated from"),
|
||||||
|
@WritesAttribute(attribute="convertexceltocsvprocessor.error", description="Error message that was encountered on a per Excel sheet basis. This attribute is" +
|
||||||
|
" only populated if an error was occured while processing the particular sheet. Having the error present at the sheet level will allow for the end" +
|
||||||
|
" user to better understand what syntax errors in their excel doc on a larger scale caused the error.")})
|
||||||
|
public class ConvertExcelToCSVProcessor
|
||||||
|
extends AbstractProcessor {
|
||||||
|
|
||||||
|
private static final String CSV_MIME_TYPE = "text/csv";
|
||||||
|
public static final String SHEET_NAME = "sheetname";
|
||||||
|
public static final String ROW_NUM = "numrows";
|
||||||
|
public static final String SOURCE_FILE_NAME = "sourcefilename";
|
||||||
|
private static final String SAX_CELL_REF = "c";
|
||||||
|
private static final String SAX_CELL_TYPE = "t";
|
||||||
|
private static final String SAX_CELL_STRING = "s";
|
||||||
|
private static final String SAX_CELL_CONTENT_REF = "v";
|
||||||
|
private static final String SAX_ROW_REF = "row";
|
||||||
|
private static final String SAX_SHEET_NAME_REF = "sheetPr";
|
||||||
|
private static final String DESIRED_SHEETS_DELIMITER = ",";
|
||||||
|
private static final String UNKNOWN_SHEET_NAME = "UNKNOWN";
|
||||||
|
private static final String SAX_PARSER = "org.apache.xerces.parsers.SAXParser";
|
||||||
|
|
||||||
|
public static final PropertyDescriptor DESIRED_SHEETS = new PropertyDescriptor
|
||||||
|
.Builder().name("extract-sheets")
|
||||||
|
.displayName("Sheets to Extract")
|
||||||
|
.description("Comma separated list of Excel document sheet names that should be extracted from the excel document. If this property" +
|
||||||
|
" is left blank then all of the sheets will be extracted from the Excel document. The list of names is case in-sensitive. Any sheets not " +
|
||||||
|
"specified in this value will be ignored.")
|
||||||
|
.required(false)
|
||||||
|
.expressionLanguageSupported(true)
|
||||||
|
.addValidator(StandardValidators.NON_EMPTY_VALIDATOR)
|
||||||
|
.build();
|
||||||
|
|
||||||
|
public static final Relationship ORIGINAL = new Relationship.Builder()
|
||||||
|
.name("original")
|
||||||
|
.description("Original Excel document received by this processor")
|
||||||
|
.build();
|
||||||
|
|
||||||
|
public static final Relationship SUCCESS = new Relationship.Builder()
|
||||||
|
.name("success")
|
||||||
|
.description("Excel data converted to csv")
|
||||||
|
.build();
|
||||||
|
|
||||||
|
public static final Relationship FAILURE = new Relationship.Builder()
|
||||||
|
.name("failure")
|
||||||
|
.description("Failed to parse the Excel document")
|
||||||
|
.build();
|
||||||
|
|
||||||
|
private List<PropertyDescriptor> descriptors;
|
||||||
|
|
||||||
|
private Set<Relationship> relationships;
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected void init(final ProcessorInitializationContext context) {
|
||||||
|
final List<PropertyDescriptor> descriptors = new ArrayList<>();
|
||||||
|
descriptors.add(DESIRED_SHEETS);
|
||||||
|
this.descriptors = Collections.unmodifiableList(descriptors);
|
||||||
|
|
||||||
|
final Set<Relationship> relationships = new HashSet<>();
|
||||||
|
relationships.add(ORIGINAL);
|
||||||
|
relationships.add(SUCCESS);
|
||||||
|
relationships.add(FAILURE);
|
||||||
|
this.relationships = Collections.unmodifiableSet(relationships);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Set<Relationship> getRelationships() {
|
||||||
|
return this.relationships;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public final List<PropertyDescriptor> getSupportedPropertyDescriptors() {
|
||||||
|
return descriptors;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void onTrigger(final ProcessContext context, final ProcessSession session) throws ProcessException {
|
||||||
|
final FlowFile flowFile = session.get();
|
||||||
|
if ( flowFile == null ) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
try {
|
||||||
|
|
||||||
|
session.read(flowFile, new InputStreamCallback() {
|
||||||
|
@Override
|
||||||
|
public void process(InputStream inputStream) throws IOException {
|
||||||
|
|
||||||
|
try {
|
||||||
|
String desiredSheetsDelimited = context.getProperty(DESIRED_SHEETS)
|
||||||
|
.evaluateAttributeExpressions().getValue();
|
||||||
|
|
||||||
|
OPCPackage pkg = OPCPackage.open(inputStream);
|
||||||
|
XSSFReader r = new XSSFReader(pkg);
|
||||||
|
SharedStringsTable sst = r.getSharedStringsTable();
|
||||||
|
XSSFReader.SheetIterator iter = (XSSFReader.SheetIterator) r.getSheetsData();
|
||||||
|
|
||||||
|
if (desiredSheetsDelimited != null) {
|
||||||
|
|
||||||
|
String[] desiredSheets = StringUtils
|
||||||
|
.split(desiredSheetsDelimited, DESIRED_SHEETS_DELIMITER);
|
||||||
|
|
||||||
|
if (desiredSheets != null) {
|
||||||
|
|
||||||
|
while (iter.hasNext()) {
|
||||||
|
InputStream sheet = iter.next();
|
||||||
|
String sheetName = iter.getSheetName();
|
||||||
|
|
||||||
|
for (int i = 0; i < desiredSheets.length; i++) {
|
||||||
|
//If the sheetName is a desired one parse it
|
||||||
|
if (sheetName.equalsIgnoreCase(desiredSheets[i])) {
|
||||||
|
handleExcelSheet(session, flowFile, sst, sheet, sheetName);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
getLogger().debug("Excel document was parsed but no sheets with the specified desired names were found.");
|
||||||
|
}
|
||||||
|
|
||||||
|
} else {
|
||||||
|
//Get all of the sheets in the document.
|
||||||
|
while (iter.hasNext()) {
|
||||||
|
handleExcelSheet(session, flowFile, sst, iter.next(), iter.getSheetName());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} catch (InvalidFormatException ife) {
|
||||||
|
getLogger().error("Only .xlsx Excel 2007 OOXML files are supported", ife);
|
||||||
|
throw new UnsupportedOperationException("Only .xlsx Excel 2007 OOXML files are supported", ife);
|
||||||
|
} catch (OpenXML4JException e) {
|
||||||
|
getLogger().error("Error occurred while processing Excel document metadata", e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
session.transfer(flowFile, ORIGINAL);
|
||||||
|
|
||||||
|
} catch (RuntimeException ex) {
|
||||||
|
getLogger().error("Failed to process incoming Excel document", ex);
|
||||||
|
FlowFile failedFlowFile = session.putAttribute(flowFile,
|
||||||
|
ConvertExcelToCSVProcessor.class.getName() + ".error", ex.getMessage());
|
||||||
|
session.transfer(failedFlowFile, FAILURE);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Handles an individual Excel sheet from the entire Excel document. Each sheet will result in an individual flowfile.
|
||||||
|
*
|
||||||
|
* @param session
|
||||||
|
* The NiFi ProcessSession instance for the current invocation.
|
||||||
|
*/
|
||||||
|
private void handleExcelSheet(ProcessSession session, FlowFile originalParentFF,
|
||||||
|
SharedStringsTable sst, final InputStream sheetInputStream, String sName) throws IOException {
|
||||||
|
|
||||||
|
FlowFile ff = session.create();
|
||||||
|
try {
|
||||||
|
|
||||||
|
XMLReader parser =
|
||||||
|
XMLReaderFactory.createXMLReader(
|
||||||
|
SAX_PARSER
|
||||||
|
);
|
||||||
|
ExcelSheetRowHandler handler = new ExcelSheetRowHandler(sst);
|
||||||
|
parser.setContentHandler(handler);
|
||||||
|
|
||||||
|
ff = session.write(ff, new OutputStreamCallback() {
|
||||||
|
@Override
|
||||||
|
public void process(OutputStream out) throws IOException {
|
||||||
|
InputSource sheetSource = new InputSource(sheetInputStream);
|
||||||
|
ExcelSheetRowHandler eh = null;
|
||||||
|
try {
|
||||||
|
eh = (ExcelSheetRowHandler) parser.getContentHandler();
|
||||||
|
eh.setFlowFileOutputStream(out);
|
||||||
|
parser.setContentHandler(eh);
|
||||||
|
parser.parse(sheetSource);
|
||||||
|
sheetInputStream.close();
|
||||||
|
} catch (SAXException se) {
|
||||||
|
getLogger().error("Error occurred while processing Excel sheet {}", new Object[]{eh.getSheetName()}, se);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
if (handler.getSheetName().equals(UNKNOWN_SHEET_NAME)) {
|
||||||
|
//Used the named parsed from the handler. This logic is only here because IF the handler does find a value that should take precedence.
|
||||||
|
ff = session.putAttribute(ff, SHEET_NAME, sName);
|
||||||
|
} else {
|
||||||
|
ff = session.putAttribute(ff, SHEET_NAME, handler.getSheetName());
|
||||||
|
sName = handler.getSheetName();
|
||||||
|
}
|
||||||
|
|
||||||
|
ff = session.putAttribute(ff, ROW_NUM, new Long(handler.getRowCount()).toString());
|
||||||
|
|
||||||
|
if (StringUtils.isNotEmpty(originalParentFF.getAttribute(CoreAttributes.FILENAME.key()))) {
|
||||||
|
ff = session.putAttribute(ff, SOURCE_FILE_NAME, originalParentFF.getAttribute(CoreAttributes.FILENAME.key()));
|
||||||
|
} else {
|
||||||
|
ff = session.putAttribute(ff, SOURCE_FILE_NAME, UNKNOWN_SHEET_NAME);
|
||||||
|
}
|
||||||
|
|
||||||
|
//Update the CoreAttributes.FILENAME to have the .csv extension now. Also update MIME.TYPE
|
||||||
|
ff = session.putAttribute(ff, CoreAttributes.FILENAME.key(), updateFilenameToCSVExtension(ff.getAttribute(CoreAttributes.UUID.key()),
|
||||||
|
ff.getAttribute(CoreAttributes.FILENAME.key()), sName));
|
||||||
|
ff = session.putAttribute(ff, CoreAttributes.MIME_TYPE.key(), CSV_MIME_TYPE);
|
||||||
|
|
||||||
|
session.transfer(ff, SUCCESS);
|
||||||
|
|
||||||
|
} catch (SAXException saxE) {
|
||||||
|
getLogger().error("Failed to create instance of SAXParser {}", new Object[]{SAX_PARSER}, saxE);
|
||||||
|
ff = session.putAttribute(ff,
|
||||||
|
ConvertExcelToCSVProcessor.class.getName() + ".error", saxE.getMessage());
|
||||||
|
session.transfer(ff, FAILURE);
|
||||||
|
} finally {
|
||||||
|
sheetInputStream.close();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Extracts every row from an Excel Sheet and generates a corresponding JSONObject whose key is the Excel CellAddress and value
|
||||||
|
* is the content of that CellAddress converted to a String
|
||||||
|
*/
|
||||||
|
private class ExcelSheetRowHandler
|
||||||
|
extends DefaultHandler {
|
||||||
|
|
||||||
|
private SharedStringsTable sst;
|
||||||
|
private String currentContent;
|
||||||
|
private boolean nextIsString;
|
||||||
|
private OutputStream outputStream;
|
||||||
|
private boolean firstColInRow;
|
||||||
|
long rowCount;
|
||||||
|
String sheetName;
|
||||||
|
|
||||||
|
private ExcelSheetRowHandler(SharedStringsTable sst) {
|
||||||
|
this.sst = sst;
|
||||||
|
this.firstColInRow = true;
|
||||||
|
this.rowCount = 0l;
|
||||||
|
this.sheetName = UNKNOWN_SHEET_NAME;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setFlowFileOutputStream(OutputStream outputStream) {
|
||||||
|
this.outputStream = outputStream;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void startElement(String uri, String localName, String name,
|
||||||
|
Attributes attributes) throws SAXException {
|
||||||
|
|
||||||
|
if (name.equals(SAX_CELL_REF)) {
|
||||||
|
String cellType = attributes.getValue(SAX_CELL_TYPE);
|
||||||
|
if(cellType != null && cellType.equals(SAX_CELL_STRING)) {
|
||||||
|
nextIsString = true;
|
||||||
|
} else {
|
||||||
|
nextIsString = false;
|
||||||
|
}
|
||||||
|
} else if (name.equals(SAX_ROW_REF)) {
|
||||||
|
firstColInRow = true;
|
||||||
|
} else if (name.equals(SAX_SHEET_NAME_REF)) {
|
||||||
|
sheetName = attributes.getValue(0);
|
||||||
|
}
|
||||||
|
|
||||||
|
currentContent = "";
|
||||||
|
}
|
||||||
|
|
||||||
|
public void endElement(String uri, String localName, String name)
|
||||||
|
throws SAXException {
|
||||||
|
|
||||||
|
if (nextIsString) {
|
||||||
|
int idx = Integer.parseInt(currentContent);
|
||||||
|
currentContent = new XSSFRichTextString(sst.getEntryAt(idx)).toString();
|
||||||
|
nextIsString = false;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (name.equals(SAX_CELL_CONTENT_REF)) {
|
||||||
|
if (firstColInRow) {
|
||||||
|
firstColInRow = false;
|
||||||
|
try {
|
||||||
|
outputStream.write(currentContent.getBytes());
|
||||||
|
} catch (IOException e) {
|
||||||
|
getLogger().error("IO error encountered while writing content of parsed cell " +
|
||||||
|
"value from sheet {}", new Object[]{getSheetName()}, e);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
try {
|
||||||
|
outputStream.write(("," + currentContent).getBytes());
|
||||||
|
} catch (IOException e) {
|
||||||
|
getLogger().error("IO error encountered while writing content of parsed cell " +
|
||||||
|
"value from sheet {}", new Object[]{getSheetName()}, e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (name.equals(SAX_ROW_REF)) {
|
||||||
|
//If this is the first row and the end of the row element has been encountered then that means no columns were present.
|
||||||
|
if (!firstColInRow) {
|
||||||
|
try {
|
||||||
|
rowCount++;
|
||||||
|
outputStream.write("\n".getBytes());
|
||||||
|
} catch (IOException e) {
|
||||||
|
getLogger().error("IO error encountered while writing new line indicator", e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
public void characters(char[] ch, int start, int length)
|
||||||
|
throws SAXException {
|
||||||
|
currentContent += new String(ch, start, length);
|
||||||
|
}
|
||||||
|
|
||||||
|
public long getRowCount() {
|
||||||
|
return rowCount;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getSheetName() {
|
||||||
|
return sheetName;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Takes the original input filename and updates it by removing the file extension and replacing it with
|
||||||
|
* the .csv extension.
|
||||||
|
*
|
||||||
|
* @param origFileName
|
||||||
|
* Original filename from the input file.
|
||||||
|
*
|
||||||
|
* @return
|
||||||
|
* The new filename with the .csv extension that should be place in the output flowfile's attributes
|
||||||
|
*/
|
||||||
|
private String updateFilenameToCSVExtension(String nifiUUID, String origFileName, String sheetName) {
|
||||||
|
|
||||||
|
StringBuilder stringBuilder = new StringBuilder();
|
||||||
|
|
||||||
|
if (StringUtils.isNotEmpty(origFileName)) {
|
||||||
|
String ext = FilenameUtils.getExtension(origFileName);
|
||||||
|
if (StringUtils.isNotEmpty(ext)) {
|
||||||
|
stringBuilder.append(StringUtils.replace(origFileName, ("." + ext), ""));
|
||||||
|
} else {
|
||||||
|
stringBuilder.append(origFileName);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
stringBuilder.append(nifiUUID);
|
||||||
|
}
|
||||||
|
|
||||||
|
stringBuilder.append("_");
|
||||||
|
stringBuilder.append(sheetName);
|
||||||
|
stringBuilder.append(".");
|
||||||
|
stringBuilder.append("csv");
|
||||||
|
|
||||||
|
return stringBuilder.toString();
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -0,0 +1,15 @@
|
||||||
|
# Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
# contributor license agreements. See the NOTICE file distributed with
|
||||||
|
# this work for additional information regarding copyright ownership.
|
||||||
|
# The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
# (the "License"); you may not use this file except in compliance with
|
||||||
|
# the License. You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
org.apache.nifi.processors.poi.ConvertExcelToCSVProcessor
|
|
@ -0,0 +1,170 @@
|
||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
package org.apache.nifi.processors.poi;
|
||||||
|
|
||||||
|
import static org.junit.Assert.assertTrue;
|
||||||
|
|
||||||
|
import java.io.File;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
import org.apache.nifi.flowfile.attributes.CoreAttributes;
|
||||||
|
import org.apache.nifi.util.LogMessage;
|
||||||
|
import org.apache.nifi.util.MockFlowFile;
|
||||||
|
import org.apache.nifi.util.TestRunner;
|
||||||
|
import org.apache.nifi.util.TestRunners;
|
||||||
|
import org.junit.Assert;
|
||||||
|
import org.junit.Before;
|
||||||
|
import org.junit.Test;
|
||||||
|
|
||||||
|
public class ConvertExcelToCSVProcessorTest {
|
||||||
|
|
||||||
|
private TestRunner testRunner;
|
||||||
|
|
||||||
|
@Before
|
||||||
|
public void init() {
|
||||||
|
testRunner = TestRunners.newTestRunner(ConvertExcelToCSVProcessor.class);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testMultipleSheetsGeneratesMultipleFlowFiles() throws Exception {
|
||||||
|
|
||||||
|
testRunner.enqueue(new File("src/test/resources/TwoSheets.xlsx").toPath());
|
||||||
|
testRunner.run();
|
||||||
|
|
||||||
|
testRunner.assertTransferCount(ConvertExcelToCSVProcessor.SUCCESS, 2);
|
||||||
|
testRunner.assertTransferCount(ConvertExcelToCSVProcessor.ORIGINAL, 1);
|
||||||
|
testRunner.assertTransferCount(ConvertExcelToCSVProcessor.FAILURE, 0);
|
||||||
|
|
||||||
|
MockFlowFile ffSheetA = testRunner.getFlowFilesForRelationship(ConvertExcelToCSVProcessor.SUCCESS).get(0);
|
||||||
|
Long rowsSheetA = new Long(ffSheetA.getAttribute(ConvertExcelToCSVProcessor.ROW_NUM));
|
||||||
|
assertTrue(rowsSheetA == 4l);
|
||||||
|
assertTrue(ffSheetA.getAttribute(ConvertExcelToCSVProcessor.SHEET_NAME).equalsIgnoreCase("TestSheetA"));
|
||||||
|
assertTrue(ffSheetA.getAttribute(ConvertExcelToCSVProcessor.SOURCE_FILE_NAME).equals("TwoSheets.xlsx"));
|
||||||
|
|
||||||
|
//Since TestRunner.run() will create a random filename even if the attribute is set in enqueue manually we just check that "_{SHEETNAME}.csv is present
|
||||||
|
assertTrue(ffSheetA.getAttribute(CoreAttributes.FILENAME.key()).endsWith("_TestSheetA.csv"));
|
||||||
|
|
||||||
|
MockFlowFile ffSheetB = testRunner.getFlowFilesForRelationship(ConvertExcelToCSVProcessor.SUCCESS).get(1);
|
||||||
|
Long rowsSheetB = new Long(ffSheetB.getAttribute(ConvertExcelToCSVProcessor.ROW_NUM));
|
||||||
|
assertTrue(rowsSheetB == 3l);
|
||||||
|
assertTrue(ffSheetB.getAttribute(ConvertExcelToCSVProcessor.SHEET_NAME).equalsIgnoreCase("TestSheetB"));
|
||||||
|
assertTrue(ffSheetB.getAttribute(ConvertExcelToCSVProcessor.SOURCE_FILE_NAME).equals("TwoSheets.xlsx"));
|
||||||
|
|
||||||
|
//Since TestRunner.run() will create a random filename even if the attribute is set in enqueue manually we just check that "_{SHEETNAME}.csv is present
|
||||||
|
assertTrue(ffSheetB.getAttribute(CoreAttributes.FILENAME.key()).endsWith("_TestSheetB.csv"));
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Validates that all sheets in the Excel document are exported.
|
||||||
|
*
|
||||||
|
* @throws Exception
|
||||||
|
* Any exception thrown during execution.
|
||||||
|
*/
|
||||||
|
@Test
|
||||||
|
public void testProcessAllSheets() throws Exception {
|
||||||
|
|
||||||
|
testRunner.enqueue(new File("src/test/resources/CollegeScorecard.xlsx").toPath());
|
||||||
|
testRunner.run();
|
||||||
|
|
||||||
|
testRunner.assertTransferCount(ConvertExcelToCSVProcessor.SUCCESS, 1);
|
||||||
|
testRunner.assertTransferCount(ConvertExcelToCSVProcessor.ORIGINAL, 1);
|
||||||
|
testRunner.assertTransferCount(ConvertExcelToCSVProcessor.FAILURE, 0);
|
||||||
|
|
||||||
|
MockFlowFile ff = testRunner.getFlowFilesForRelationship(ConvertExcelToCSVProcessor.SUCCESS).get(0);
|
||||||
|
Long l = new Long(ff.getAttribute(ConvertExcelToCSVProcessor.ROW_NUM));
|
||||||
|
assertTrue(l == 7805l);
|
||||||
|
|
||||||
|
testRunner.clearProvenanceEvents();
|
||||||
|
testRunner.clearTransferState();
|
||||||
|
|
||||||
|
testRunner.enqueue(new File("src/test/resources/TwoSheets.xlsx").toPath());
|
||||||
|
testRunner.run();
|
||||||
|
|
||||||
|
testRunner.assertTransferCount(ConvertExcelToCSVProcessor.SUCCESS, 2);
|
||||||
|
testRunner.assertTransferCount(ConvertExcelToCSVProcessor.ORIGINAL, 1);
|
||||||
|
testRunner.assertTransferCount(ConvertExcelToCSVProcessor.FAILURE, 0);
|
||||||
|
|
||||||
|
ff = testRunner.getFlowFilesForRelationship(ConvertExcelToCSVProcessor.SUCCESS).get(0);
|
||||||
|
l = new Long(ff.getAttribute(ConvertExcelToCSVProcessor.ROW_NUM));
|
||||||
|
assertTrue(l == 4l);
|
||||||
|
|
||||||
|
ff = testRunner.getFlowFilesForRelationship(ConvertExcelToCSVProcessor.SUCCESS).get(1);
|
||||||
|
l = new Long(ff.getAttribute(ConvertExcelToCSVProcessor.ROW_NUM));
|
||||||
|
assertTrue(l == 3l);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Validates that the manually specified sheet is exported from the Excel document.
|
||||||
|
*
|
||||||
|
* @throws Exception
|
||||||
|
* Any exception thrown during execution.
|
||||||
|
*/
|
||||||
|
@Test
|
||||||
|
public void testProcessASpecificSheetThatDoesExist() throws Exception {
|
||||||
|
|
||||||
|
testRunner.setProperty(ConvertExcelToCSVProcessor.DESIRED_SHEETS, "Scorecard");
|
||||||
|
testRunner.enqueue(new File("src/test/resources/CollegeScorecard.xlsx").toPath());
|
||||||
|
testRunner.run();
|
||||||
|
|
||||||
|
testRunner.assertTransferCount(ConvertExcelToCSVProcessor.SUCCESS, 1);
|
||||||
|
testRunner.assertTransferCount(ConvertExcelToCSVProcessor.ORIGINAL, 1);
|
||||||
|
testRunner.assertTransferCount(ConvertExcelToCSVProcessor.FAILURE, 0);
|
||||||
|
|
||||||
|
MockFlowFile ff = testRunner.getFlowFilesForRelationship(ConvertExcelToCSVProcessor.SUCCESS).get(0);
|
||||||
|
Long l = new Long(ff.getAttribute(ConvertExcelToCSVProcessor.ROW_NUM));
|
||||||
|
assertTrue(l == 7805l);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Tests for a syntactically valid Excel XSSF document with a manually specified Excel sheet that does not exist.
|
||||||
|
* In this scenario only the Original relationship should be invoked.
|
||||||
|
*
|
||||||
|
* @throws Exception
|
||||||
|
* Any exception thrown during execution.
|
||||||
|
*/
|
||||||
|
@Test
|
||||||
|
public void testNonExistantSpecifiedSheetName() throws Exception {
|
||||||
|
|
||||||
|
testRunner.setProperty(ConvertExcelToCSVProcessor.DESIRED_SHEETS, "NopeIDoNotExist");
|
||||||
|
testRunner.enqueue(new File("src/test/resources/CollegeScorecard.xlsx").toPath());
|
||||||
|
testRunner.run();
|
||||||
|
|
||||||
|
testRunner.assertTransferCount(ConvertExcelToCSVProcessor.SUCCESS, 0); //We aren't expecting any output to success here because the sheet doesn't exist
|
||||||
|
testRunner.assertTransferCount(ConvertExcelToCSVProcessor.ORIGINAL, 1);
|
||||||
|
testRunner.assertTransferCount(ConvertExcelToCSVProcessor.FAILURE, 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Tests for graceful handling and error messaging of unsupported .XLS files.
|
||||||
|
*/
|
||||||
|
@Test
|
||||||
|
public void testHandleUnsupportedXlsFile() throws Exception {
|
||||||
|
|
||||||
|
testRunner.enqueue(new File("src/test/resources/Unsupported.xls").toPath());
|
||||||
|
testRunner.run();
|
||||||
|
|
||||||
|
testRunner.assertTransferCount(ConvertExcelToCSVProcessor.SUCCESS, 0);
|
||||||
|
testRunner.assertTransferCount(ConvertExcelToCSVProcessor.ORIGINAL, 0);
|
||||||
|
testRunner.assertTransferCount(ConvertExcelToCSVProcessor.FAILURE, 1);
|
||||||
|
|
||||||
|
List<LogMessage> errorMessages = testRunner.getLogger().getErrorMessages();
|
||||||
|
Assert.assertEquals(2, errorMessages.size());
|
||||||
|
String messageText = errorMessages.get(0).getMsg();
|
||||||
|
Assert.assertTrue(messageText.contains("Excel") && messageText.contains("supported"));
|
||||||
|
}
|
||||||
|
}
|
Binary file not shown.
Binary file not shown.
Binary file not shown.
|
@ -0,0 +1,35 @@
|
||||||
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
<!--
|
||||||
|
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
contributor license agreements. See the NOTICE file distributed with
|
||||||
|
this work for additional information regarding copyright ownership.
|
||||||
|
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
(the "License"); you may not use this file except in compliance with
|
||||||
|
the License. You may obtain a copy of the License at
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
Unless required by applicable law or agreed to in writing, software
|
||||||
|
distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
See the License for the specific language governing permissions and
|
||||||
|
limitations under the License.
|
||||||
|
-->
|
||||||
|
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
|
||||||
|
<modelVersion>4.0.0</modelVersion>
|
||||||
|
|
||||||
|
<parent>
|
||||||
|
<groupId>org.apache.nifi</groupId>
|
||||||
|
<artifactId>nifi-nar-bundles</artifactId>
|
||||||
|
<version>1.2.0-SNAPSHOT</version>
|
||||||
|
</parent>
|
||||||
|
|
||||||
|
<groupId>org.apache.nifi</groupId>
|
||||||
|
<artifactId>nifi-poi-bundle</artifactId>
|
||||||
|
<version>1.2.0-SNAPSHOT</version>
|
||||||
|
<packaging>pom</packaging>
|
||||||
|
|
||||||
|
<modules>
|
||||||
|
<module>nifi-poi-processors</module>
|
||||||
|
<module>nifi-poi-nar</module>
|
||||||
|
</modules>
|
||||||
|
|
||||||
|
</project>
|
|
@ -77,6 +77,7 @@
|
||||||
<module>nifi-gcp-bundle</module>
|
<module>nifi-gcp-bundle</module>
|
||||||
<module>nifi-registry-bundle</module>
|
<module>nifi-registry-bundle</module>
|
||||||
<module>nifi-stateful-analysis-bundle</module>
|
<module>nifi-stateful-analysis-bundle</module>
|
||||||
|
<module>nifi-poi-bundle</module>
|
||||||
</modules>
|
</modules>
|
||||||
|
|
||||||
<dependencyManagement>
|
<dependencyManagement>
|
||||||
|
|
6
pom.xml
6
pom.xml
|
@ -1000,6 +1000,12 @@ language governing permissions and limitations under the License. -->
|
||||||
<version>1.2.0-SNAPSHOT</version>
|
<version>1.2.0-SNAPSHOT</version>
|
||||||
<type>nar</type>
|
<type>nar</type>
|
||||||
</dependency>
|
</dependency>
|
||||||
|
<dependency>
|
||||||
|
<groupId>org.apache.nifi</groupId>
|
||||||
|
<artifactId>nifi-poi-nar</artifactId>
|
||||||
|
<version>1.2.0-SNAPSHOT</version>
|
||||||
|
<type>nar</type>
|
||||||
|
</dependency>
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>org.apache.nifi</groupId>
|
<groupId>org.apache.nifi</groupId>
|
||||||
<artifactId>nifi-kite-nar</artifactId>
|
<artifactId>nifi-kite-nar</artifactId>
|
||||||
|
|
Loading…
Reference in New Issue