From 20c815dc12b6db6c98af7364a379523270a1023b Mon Sep 17 00:00:00 2001 From: dan-s1 Date: Fri, 14 Jun 2024 18:55:49 +0000 Subject: [PATCH] NIFI-13304 Added SplitExcel Processor This closes #8981 Signed-off-by: David Handermann --- .../nifi-poi-bundle/nifi-poi-nar/pom.xml | 3 - .../nifi-poi-bundle/nifi-poi-services/pom.xml | 8 +- .../org/apache/nifi/excel/ExcelReader.java | 29 --- .../nifi/processors/excel/SplitExcel.java | 221 ++++++++++++++++++ .../org.apache.nifi.processor.Processor | 16 ++ .../nifi/processors/excel/TestSplitExcel.java | 148 ++++++++++++ .../excel/dataWithSharedFormula.xlsx | Bin 0 -> 16865 bytes .../resources/excel/sheetsWithEmptySheet.xlsx | Bin 0 -> 7445 bytes .../nifi-poi-bundle/nifi-poi-utils/pom.xml | 26 +++ .../org/apache/nifi/excel/ProtectionType.java | 47 ++++ .../nifi-poi-bundle/pom.xml | 1 + 11 files changed, 464 insertions(+), 35 deletions(-) create mode 100644 nifi-extension-bundles/nifi-poi-bundle/nifi-poi-services/src/main/java/org/apache/nifi/processors/excel/SplitExcel.java create mode 100644 nifi-extension-bundles/nifi-poi-bundle/nifi-poi-services/src/main/resources/META-INF/services/org.apache.nifi.processor.Processor create mode 100644 nifi-extension-bundles/nifi-poi-bundle/nifi-poi-services/src/test/java/org/apache/nifi/processors/excel/TestSplitExcel.java create mode 100644 nifi-extension-bundles/nifi-poi-bundle/nifi-poi-services/src/test/resources/excel/dataWithSharedFormula.xlsx create mode 100644 nifi-extension-bundles/nifi-poi-bundle/nifi-poi-services/src/test/resources/excel/sheetsWithEmptySheet.xlsx create mode 100644 nifi-extension-bundles/nifi-poi-bundle/nifi-poi-utils/pom.xml create mode 100644 nifi-extension-bundles/nifi-poi-bundle/nifi-poi-utils/src/main/java/org/apache/nifi/excel/ProtectionType.java diff --git a/nifi-extension-bundles/nifi-poi-bundle/nifi-poi-nar/pom.xml b/nifi-extension-bundles/nifi-poi-bundle/nifi-poi-nar/pom.xml index 075dc77336..9db159a85f 100644 --- a/nifi-extension-bundles/nifi-poi-bundle/nifi-poi-nar/pom.xml +++ b/nifi-extension-bundles/nifi-poi-bundle/nifi-poi-nar/pom.xml @@ -21,11 +21,8 @@ nifi-poi-bundle 2.0.0-SNAPSHOT - nifi-poi-nar nar - - org.apache.nifi diff --git a/nifi-extension-bundles/nifi-poi-bundle/nifi-poi-services/pom.xml b/nifi-extension-bundles/nifi-poi-bundle/nifi-poi-services/pom.xml index aa640c0e26..5fe26e6863 100644 --- a/nifi-extension-bundles/nifi-poi-bundle/nifi-poi-services/pom.xml +++ b/nifi-extension-bundles/nifi-poi-bundle/nifi-poi-services/pom.xml @@ -30,10 +30,12 @@ src/test/resources/excel/collegeScorecard.xlsx src/test/resources/excel/dataformatting.xlsx + src/test/resources/excel/dataWithSharedFormula.xlsx src/test/resources/excel/dates.xlsx src/test/resources/excel/notExcel.txt src/test/resources/excel/numbers.xlsx src/test/resources/excel/olderFormat.xls + src/test/resources/excel/sheetsWithEmptySheet.xlsx src/test/resources/excel/simpleDataFormatting.xlsx src/test/resources/excel/twoSheets.xlsx @@ -63,7 +65,6 @@ org.apache.logging.log4j log4j-to-slf4j - org.apache.nifi nifi-record @@ -82,8 +83,9 @@ 2.0.0-SNAPSHOT - org.apache.commons - commons-lang3 + org.apache.nifi + nifi-poi-utils + 2.0.0-SNAPSHOT \ No newline at end of file diff --git a/nifi-extension-bundles/nifi-poi-bundle/nifi-poi-services/src/main/java/org/apache/nifi/excel/ExcelReader.java b/nifi-extension-bundles/nifi-poi-bundle/nifi-poi-services/src/main/java/org/apache/nifi/excel/ExcelReader.java index 24d47f3a75..09c7f647f1 100644 --- a/nifi-extension-bundles/nifi-poi-bundle/nifi-poi-services/src/main/java/org/apache/nifi/excel/ExcelReader.java +++ b/nifi-extension-bundles/nifi-poi-bundle/nifi-poi-services/src/main/java/org/apache/nifi/excel/ExcelReader.java @@ -21,7 +21,6 @@ import org.apache.nifi.annotation.documentation.CapabilityDescription; import org.apache.nifi.annotation.documentation.Tags; import org.apache.nifi.annotation.lifecycle.OnEnabled; import org.apache.nifi.components.AllowableValue; -import org.apache.nifi.components.DescribedValue; import org.apache.nifi.components.PropertyDescriptor; import org.apache.nifi.context.PropertyContext; import org.apache.nifi.controller.ConfigurationContext; @@ -62,34 +61,6 @@ import java.util.Map; + "(XSSF 2007 OOXML file format) Excel documents and not older .xls (HSSF '97(-2007) file format) documents.") public class ExcelReader extends SchemaRegistryService implements RecordReaderFactory { - public enum ProtectionType implements DescribedValue { - UNPROTECTED("Unprotected", "An Excel spreadsheet not protected by a password"), - PASSWORD("Password Protected", "An Excel spreadsheet protected by a password"); - - ProtectionType(String displayName, String description) { - this.displayName = displayName; - this.description = description; - } - - private final String displayName; - private final String description; - - @Override - public String getValue() { - return name(); - } - - @Override - public String getDisplayName() { - return displayName; - } - - @Override - public String getDescription() { - return description; - } - } - public static final PropertyDescriptor REQUIRED_SHEETS = new PropertyDescriptor .Builder().name("Required Sheets") .displayName("Required Sheets") diff --git a/nifi-extension-bundles/nifi-poi-bundle/nifi-poi-services/src/main/java/org/apache/nifi/processors/excel/SplitExcel.java b/nifi-extension-bundles/nifi-poi-bundle/nifi-poi-services/src/main/java/org/apache/nifi/processors/excel/SplitExcel.java new file mode 100644 index 0000000000..86812f2e4f --- /dev/null +++ b/nifi-extension-bundles/nifi-poi-bundle/nifi-poi-services/src/main/java/org/apache/nifi/processors/excel/SplitExcel.java @@ -0,0 +1,221 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.nifi.processors.excel; + +import com.github.pjfanning.xlsx.StreamingReader; +import com.github.pjfanning.xlsx.exceptions.ExcelRuntimeException; +import org.apache.nifi.annotation.behavior.InputRequirement; +import org.apache.nifi.annotation.behavior.SideEffectFree; +import org.apache.nifi.annotation.behavior.SupportsBatching; +import org.apache.nifi.annotation.behavior.WritesAttribute; +import org.apache.nifi.annotation.behavior.WritesAttributes; +import org.apache.nifi.annotation.documentation.CapabilityDescription; +import org.apache.nifi.annotation.documentation.Tags; +import org.apache.nifi.components.PropertyDescriptor; +import org.apache.nifi.excel.ProtectionType; +import org.apache.nifi.flowfile.FlowFile; +import org.apache.nifi.flowfile.attributes.CoreAttributes; +import org.apache.nifi.processor.AbstractProcessor; +import org.apache.nifi.processor.ProcessContext; +import org.apache.nifi.processor.ProcessSession; +import org.apache.nifi.processor.Relationship; +import org.apache.nifi.processor.exception.ProcessException; +import org.apache.nifi.processor.util.StandardValidators; +import org.apache.poi.ss.usermodel.CellCopyPolicy; +import org.apache.poi.ss.usermodel.Row; +import org.apache.poi.ss.usermodel.Sheet; +import org.apache.poi.ss.usermodel.Workbook; +import org.apache.poi.xssf.usermodel.XSSFSheet; +import org.apache.poi.xssf.usermodel.XSSFWorkbook; + +import java.io.OutputStream; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.UUID; + +import static org.apache.nifi.flowfile.attributes.FragmentAttributes.FRAGMENT_COUNT; +import static org.apache.nifi.flowfile.attributes.FragmentAttributes.FRAGMENT_ID; +import static org.apache.nifi.flowfile.attributes.FragmentAttributes.FRAGMENT_INDEX; +import static org.apache.nifi.flowfile.attributes.FragmentAttributes.SEGMENT_ORIGINAL_FILENAME; + +@SideEffectFree +@SupportsBatching +@Tags({"split", "text"}) +@InputRequirement(InputRequirement.Requirement.INPUT_REQUIRED) +@CapabilityDescription("Splits a multi sheet Microsoft Excel spreadsheet into multiple Microsoft Excel spreadsheets where each sheet from the original" + + " file is converted to an individual spreadsheet in its own flow file. This processor is currently only capable of processing .xlsx " + + "(XSSF 2007 OOXML file format) Excel documents and not older .xls (HSSF '97(-2007) file format) documents.") +@WritesAttributes({ + @WritesAttribute(attribute = "fragment.identifier", description = "All split Excel FlowFiles produced from the same parent Excel FlowFile will have the same randomly generated UUID added" + + " for this attribute"), + @WritesAttribute(attribute = "fragment.index", description = "A one-up number that indicates the ordering of the split Excel FlowFiles that were created from a single parent Excel FlowFile"), + @WritesAttribute(attribute = "fragment.count", description = "The number of split Excel FlowFiles generated from the parent Excel FlowFile"), + @WritesAttribute(attribute = "segment.original.filename", description = "The filename of the parent Excel FlowFile"), + @WritesAttribute(attribute = SplitExcel.SHEET_NAME, description = "The name of the Excel sheet from the original spreadsheet."), + @WritesAttribute(attribute = SplitExcel.TOTAL_ROWS, description = "The number of rows in the Excel sheet from the original spreadsheet.")}) +public class SplitExcel extends AbstractProcessor { + public static final String SHEET_NAME = "sheetname"; + public static final String TOTAL_ROWS = "total.rows"; + + public static final PropertyDescriptor PROTECTION_TYPE = new PropertyDescriptor.Builder() + .name("Protection Type") + .description("Specifies whether an Excel spreadsheet is protected by a password or not.") + .required(true) + .allowableValues(ProtectionType.class) + .defaultValue(ProtectionType.UNPROTECTED) + .build(); + + public static final PropertyDescriptor PASSWORD = new PropertyDescriptor.Builder() + .name("Password") + .description("The password for a password protected Excel spreadsheet") + .required(true) + .sensitive(true) + .addValidator(StandardValidators.NON_BLANK_VALIDATOR) + .dependsOn(PROTECTION_TYPE, ProtectionType.PASSWORD) + .build(); + + public static final Relationship REL_ORIGINAL = new Relationship.Builder() + .name("original") + .description("The original FlowFile that was split into segments. If the FlowFile fails processing, nothing will be sent to this relationship") + .build(); + + public static final Relationship REL_FAILURE = new Relationship.Builder() + .name("failure") + .description("If a FlowFile cannot be transformed from the configured input format to the configured output format, the unchanged FlowFile will be routed to this relationship.") + .build(); + + public static final Relationship REL_SPLIT = new Relationship.Builder() + .name("split") + .description("The individual Excel 'segments' of the original Excel FlowFile will be routed to this relationship.") + .build(); + + private static final List DESCRIPTORS = List.of(PROTECTION_TYPE, PASSWORD); + private static final Set RELATIONSHIPS = Set.of(REL_ORIGINAL, REL_FAILURE, REL_SPLIT); + private static final CellCopyPolicy CELL_COPY_POLICY = new CellCopyPolicy.Builder() + .cellFormula(CellCopyPolicy.DEFAULT_COPY_CELL_FORMULA_POLICY) + .cellStyle(CellCopyPolicy.DEFAULT_COPY_CELL_STYLE_POLICY) + .cellValue(CellCopyPolicy.DEFAULT_COPY_CELL_VALUE_POLICY) + .condenseRows(CellCopyPolicy.DEFAULT_CONDENSE_ROWS_POLICY) + .copyHyperlink(CellCopyPolicy.DEFAULT_COPY_HYPERLINK_POLICY) + .mergeHyperlink(CellCopyPolicy.DEFAULT_MERGE_HYPERLINK_POLICY) + .mergedRegions(CellCopyPolicy.DEFAULT_COPY_MERGED_REGIONS_POLICY) + .rowHeight(CellCopyPolicy.DEFAULT_COPY_ROW_HEIGHT_POLICY) + .build(); + + @Override + public Set getRelationships() { + return RELATIONSHIPS; + } + + @Override + public final List getSupportedPropertyDescriptors() { + return DESCRIPTORS; + } + + @Override + public void onTrigger(ProcessContext context, ProcessSession session) throws ProcessException { + FlowFile originalFlowFile = session.get(); + if (originalFlowFile == null) { + return; + } + + final String password = context.getProperty(PASSWORD).getValue(); + final List workbookSplits = new ArrayList<>(); + + try { + session.read(originalFlowFile, in -> { + + final Workbook originalWorkbook = StreamingReader.builder() + .rowCacheSize(100) + .bufferSize(4096) + .password(password) + .setReadHyperlinks(true) // NOTE: Needed for copying rows. + .setReadSharedFormulas(true) // NOTE: If not set to true, then data with shared formulas fail. + .open(in); + + int index = 0; + for (final Sheet originalSheet : originalWorkbook) { + final String originalSheetName = originalSheet.getSheetName(); + try (XSSFWorkbook newWorkbook = new XSSFWorkbook()) { + XSSFSheet newSheet = newWorkbook.createSheet(originalSheetName); + List originalRows = new ArrayList<>(); + for (Row originalRow : originalSheet) { + originalRows.add(originalRow); + } + + if (!originalRows.isEmpty()) { + newSheet.copyRows(originalRows, originalSheet.getFirstRowNum(), CELL_COPY_POLICY); + } + + FlowFile newFlowFile = session.create(originalFlowFile); + try (final OutputStream out = session.write(newFlowFile)) { + newWorkbook.write(out); + workbookSplits.add(new WorkbookSplit(index, newFlowFile, originalSheetName, originalRows.size())); + } + } + + index++; + } + }); + } catch (ExcelRuntimeException | ProcessException e) { + getLogger().error("Failed to split {}", originalFlowFile, e); + session.remove(workbookSplits.stream() + .map(WorkbookSplit::content) + .toList()); + workbookSplits.clear(); + session.transfer(originalFlowFile, REL_FAILURE); + return; + } + + final String fragmentId = UUID.randomUUID().toString(); + final String originalFileName = originalFlowFile.getAttribute(CoreAttributes.FILENAME.key()); + final int extensionIndex = originalFileName.lastIndexOf("."); + String originalFileNameWithoutExtension = originalFileName; + String originalFileNameExtension = ""; + + if (extensionIndex > -1) { + originalFileNameWithoutExtension = originalFileName.substring(0, extensionIndex); + originalFileNameExtension = originalFileName.substring(extensionIndex); + } + + final Map attributes = new HashMap<>(); + attributes.put(FRAGMENT_COUNT.key(), String.valueOf(workbookSplits.size())); + attributes.put(FRAGMENT_ID.key(), fragmentId); + attributes.put(SEGMENT_ORIGINAL_FILENAME.key(), originalFileName); + + for (WorkbookSplit split : workbookSplits) { + attributes.put(CoreAttributes.FILENAME.key(), String.format("%s-%s%s", originalFileNameWithoutExtension, split.index(), originalFileNameExtension)); + attributes.put(FRAGMENT_INDEX.key(), Integer.toString(split.index())); + attributes.put(SHEET_NAME, split.sheetName()); + attributes.put(TOTAL_ROWS, Integer.toString(split.numRows())); + session.putAllAttributes(split.content(), attributes); + } + + session.transfer(originalFlowFile, REL_ORIGINAL); + final List flowFileSplits = workbookSplits.stream() + .map(WorkbookSplit::content) + .toList(); + + session.transfer(flowFileSplits, REL_SPLIT); + } + + private record WorkbookSplit(int index, FlowFile content, String sheetName, int numRows) { + } +} diff --git a/nifi-extension-bundles/nifi-poi-bundle/nifi-poi-services/src/main/resources/META-INF/services/org.apache.nifi.processor.Processor b/nifi-extension-bundles/nifi-poi-bundle/nifi-poi-services/src/main/resources/META-INF/services/org.apache.nifi.processor.Processor new file mode 100644 index 0000000000..ab2dc1b73a --- /dev/null +++ b/nifi-extension-bundles/nifi-poi-bundle/nifi-poi-services/src/main/resources/META-INF/services/org.apache.nifi.processor.Processor @@ -0,0 +1,16 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +org.apache.nifi.processors.excel.SplitExcel diff --git a/nifi-extension-bundles/nifi-poi-bundle/nifi-poi-services/src/test/java/org/apache/nifi/processors/excel/TestSplitExcel.java b/nifi-extension-bundles/nifi-poi-bundle/nifi-poi-services/src/test/java/org/apache/nifi/processors/excel/TestSplitExcel.java new file mode 100644 index 0000000000..42a1e18e1a --- /dev/null +++ b/nifi-extension-bundles/nifi-poi-bundle/nifi-poi-services/src/test/java/org/apache/nifi/processors/excel/TestSplitExcel.java @@ -0,0 +1,148 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.nifi.processors.excel; + +import org.apache.nifi.util.MockFlowFile; +import org.apache.nifi.util.TestRunner; +import org.apache.nifi.util.TestRunners; +import org.apache.poi.xssf.usermodel.XSSFWorkbook; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; + +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.util.List; + +import static org.apache.nifi.flowfile.attributes.FragmentAttributes.FRAGMENT_COUNT; +import static org.apache.nifi.flowfile.attributes.FragmentAttributes.FRAGMENT_ID; +import static org.apache.nifi.flowfile.attributes.FragmentAttributes.FRAGMENT_INDEX; +import static org.apache.nifi.flowfile.attributes.FragmentAttributes.SEGMENT_ORIGINAL_FILENAME; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertNotNull; + +public class TestSplitExcel { + private TestRunner runner; + + @BeforeAll + static void setUpBeforeAll() throws Exception { + final ByteArrayOutputStream outputStream = new ByteArrayOutputStream(); + try (XSSFWorkbook workbook = new XSSFWorkbook()) { + workbook.createSheet("User Info"); + workbook.createSheet("Vehicle Info"); + workbook.write(outputStream); + } + } + + @BeforeEach + void setUp() { + runner = TestRunners.newTestRunner(SplitExcel.class); + } + + @Test + void testSingleSheet() throws IOException { + Path singleSheet = Paths.get("src/test/resources/excel/dates.xlsx"); + runner.enqueue(singleSheet); + + runner.run(); + + runner.assertTransferCount(SplitExcel.REL_SPLIT, 1); + runner.assertTransferCount(SplitExcel.REL_ORIGINAL, 1); + runner.assertTransferCount(SplitExcel.REL_FAILURE, 0); + } + + @Test + void testMultisheet() throws IOException { + Path multisheet = Paths.get("src/test/resources/excel/twoSheets.xlsx"); + String fileName = multisheet.toFile().getName(); + runner.enqueue(multisheet); + + runner.run(); + + runner.assertTransferCount(SplitExcel.REL_SPLIT, 2); + runner.assertTransferCount(SplitExcel.REL_ORIGINAL, 1); + runner.assertTransferCount(SplitExcel.REL_FAILURE, 0); + + List flowFiles = runner.getFlowFilesForRelationship(SplitExcel.REL_SPLIT); + String expectedSheetNamesPrefix = "TestSheet"; + List expectedSheetSuffixes = List.of("A", "B"); + List expectedTotalRows = List.of(4, 3); + + for (int index = 0; index < flowFiles.size(); index++) { + MockFlowFile flowFile = flowFiles.get(index); + assertNotNull(flowFile.getAttribute(FRAGMENT_ID.key())); + assertEquals(Integer.toString(index), flowFile.getAttribute(FRAGMENT_INDEX.key())); + assertEquals(Integer.toString(flowFiles.size()), flowFile.getAttribute(FRAGMENT_COUNT.key())); + assertEquals(fileName, flowFile.getAttribute(SEGMENT_ORIGINAL_FILENAME.key())); + assertEquals(expectedSheetNamesPrefix + expectedSheetSuffixes.get(index), flowFile.getAttribute(SplitExcel.SHEET_NAME)); + assertEquals(expectedTotalRows.get(index).toString(), flowFile.getAttribute(SplitExcel.TOTAL_ROWS)); + } + } + + @Test + void testNonExcel() throws IOException { + Path nonExcel = Paths.get("src/test/resources/excel/notExcel.txt"); + runner.enqueue(nonExcel); + + runner.run(); + + runner.assertTransferCount(SplitExcel.REL_SPLIT, 0); + runner.assertTransferCount(SplitExcel.REL_ORIGINAL, 0); + runner.assertTransferCount(SplitExcel.REL_FAILURE, 1); + } + + @Test + void testWithEmptySheet() throws IOException { + Path sheetsWithEmptySheet = Paths.get("src/test/resources/excel/sheetsWithEmptySheet.xlsx"); + String fileName = sheetsWithEmptySheet.toFile().getName(); + runner.enqueue(sheetsWithEmptySheet); + + runner.run(); + + runner.assertTransferCount(SplitExcel.REL_SPLIT, 3); + runner.assertTransferCount(SplitExcel.REL_ORIGINAL, 1); + runner.assertTransferCount(SplitExcel.REL_FAILURE, 0); + + List flowFiles = runner.getFlowFilesForRelationship(SplitExcel.REL_SPLIT); + List expectedSheetSuffixes = List.of("TestSheetA", "TestSheetB", "emptySheet"); + List expectedTotalRows = List.of(4, 3, 0); + + for (int index = 0; index < flowFiles.size(); index++) { + MockFlowFile flowFile = flowFiles.get(index); + assertNotNull(flowFile.getAttribute(FRAGMENT_ID.key())); + assertEquals(Integer.toString(index), flowFile.getAttribute(FRAGMENT_INDEX.key())); + assertEquals(Integer.toString(flowFiles.size()), flowFile.getAttribute(FRAGMENT_COUNT.key())); + assertEquals(fileName, flowFile.getAttribute(SEGMENT_ORIGINAL_FILENAME.key())); + assertEquals(expectedSheetSuffixes.get(index), flowFile.getAttribute(SplitExcel.SHEET_NAME)); + assertEquals(expectedTotalRows.get(index).toString(), flowFile.getAttribute(SplitExcel.TOTAL_ROWS)); + } + } + + @Test + void testDataWithSharedFormula() throws IOException { + Path dataWithSharedFormula = Paths.get("src/test/resources/excel/dataWithSharedFormula.xlsx"); + runner.enqueue(dataWithSharedFormula); + + runner.run(); + + runner.assertTransferCount(SplitExcel.REL_SPLIT, 2); + runner.assertTransferCount(SplitExcel.REL_ORIGINAL, 1); + runner.assertTransferCount(SplitExcel.REL_FAILURE, 0); + } +} diff --git a/nifi-extension-bundles/nifi-poi-bundle/nifi-poi-services/src/test/resources/excel/dataWithSharedFormula.xlsx b/nifi-extension-bundles/nifi-poi-bundle/nifi-poi-services/src/test/resources/excel/dataWithSharedFormula.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..c399b640cbe0d981839621de3a95c07c529ee87e GIT binary patch literal 16865 zcmeIZbx>Vd(=U9G;O_437Tn$4HMqM6hv4oIT!OnxfZ!4c?(P!YoqIA*-8(axdB3Xf z@AuhN>{G>Fzus&0YFXX8TTuoK91Q>gfC2yj!~ls*vj~3>000jH06+ymf$E6Z+qsz9 zxfrN=I+!}^F?iV85aok|Qsn|bf#d&s{V(P~pQ@Y!C^OpEjC(@%4xFZM-B`UnAU7ps zMq7iIAx!CLA0Yg z>VMb1s>9Gh>M)as5d;s3s2CZXx8;~z#34jreQl;{Bss0TqF&5TBI~?dMJ(^5HG&JOK3iubCCN$ISI-8w&I>k zParx~6B+9>&eX9fn>j_-b4cMmsJ6TcUCdlW#=GolQLi4b)H~+fgxCd?xwkhkfa2d2 zw^5CW^cwg?7O0o7K*cq1GPQALWcYReufqN>*3!Q$y&^$Qp_d6RZceRTW4@ng#2u#AS!}ob)d6hT%Xo&cBi>)FO1&xQa$-Ocp z_0iE4l7_-5MclDsvk%#A_Gb1rT|(NE%B>@oro5#jPiA}t?HySm20qaS2LNyYP#_*QjQ`OScY7ymBYS)6U;gd?>I?|b z$pOdy@4mh!3CZ*^AqAfWzUA%MO{?pPv9AV^9II)Z<4jqiHc3gFsh@Ols5ZG8mZokw zuElW>GNxB>7&OW86XjmW*YJ*Mg33RPw20R;)}dh zTZwDx+4>XMa`=yxz$QdlrH4Z0nK9*F$C>JfD=<1J7t18*qcakeBrF-?m&u70zXN7z zgIi)b12Tu4nK&YQ6xRoC2y^HWZX9_99*{@ufpYOr0V6~kI^+ae;8dW1Ap%DL1?;b) zRhT5Fuq=oaa+dr^*i>~MHna(EDe(?ZBduqM|<15s+V|vmfk1@?T~lW zzlL~n_2VmRdwoH)`Y}jW>Kqy`ehjAwsSWv9*+$pi{!?h?1E2`h6dvOlnNNU5!0&Tq zWxIi<*ErM%@9v*SpLwOemM8@gEzy3MnDILS)Z@F(D{r(*lp=>XIsx$VFy(kCmk|~Fp`e(usfm&2`gr4 zKQ){p+v{#YA6&o2&>N>HMYz-MJ*|{bqU9KWBUm0)SPP|S-VzWuY8xc-#qpO=ML@qOCSroX__FV&PKDrT9&B_Lf!XS+=TlWz8N12f*gIsMIzVv zUeHmL^DpZ)qiJiu%#QTR|KTk_(LWD=PqL1lL!Pd<7T@??Tdp9> z2SOO|22n0i$;pC)zSEkwPSR2i%BBL3cPS5%ViE4%r>CvkNjg3&2}DRkb(upJ_rBEQjS!aAIs0;~DVS6reC58rbz+x_WP7O|_7mZEbhJm*yx#&xdF zZ-3uxad7!AM(4P-R;Z>G#NF7vga4^Sqn)d7lL*-|_!_K5$ZPwWA3gR^wF|TmU{H;N zP-lMt!kTt57L$g`Ltrd!o^Uh_?mRN*)@JKgm%ofA8VDVZHUSv*3zqK9^tP5Kj%mMR zMw&u72rf*^qow=e;f;NQa=gg4NK#}et1dct9{%X#>+#NFJ3=IDn_$m=JAEGFJlyy$ z99I+jJcvb|LR^9smLdd9Qc28&I~Zgn$9@hrR}5yhFJ!*L%kV~bt8pFSGptGAq-H@{ zEIjtEHwO3gbMfrxC8p^pv90Ut44`+YhsE#1w(JlVjVa&D$&AA{$$UZGsfVmr4Wqm} zBxxZnYy_i-eUg4zdqv3^^1L2lW_WygqqZHuasnA7AG1J^ynW)?l;*$(mN=Y9z%!_i zF}C%wn~rlqcQCTyu{Q3Lgz_UZAV@>1vZounaJZ_x)E7K(Cw9)<4bZuQH1e|w-TDko z#H03uwH`>c6Hg$1+2oLDD~$ zcWs>+Y1qrF+{E;J)}z;6kV>eP2`6~AqQ_ZNzT3)xs-zJ7V6=qD;_1g8>z zw;Smgec2{ea-hn^0g8g`C3v>2Xq?7Q`u66ZO{vS8W1Ccs*`a=a*bZ^0cu;M8L-BSy zBQJNGxvBW0d{us{|6-d>u0%K{hskV-{3(9{4v6g@=t80IV+S+l z^=I`0lCe81z3H2#?M_&#Pe-=ME*dWSLxnW%2)PH#LsQXZOPmTh>T0V^KlwqpgR~*r zC%aN?!Kd7dC$M3EICg`b_uELN=a<^Sa<@@?HtAQ|`2|PDo3M2-?u^o3AC5So2Oih( zvT~#r|EzLR8Macb5CSNOeuq1yAcdtpgJ>=nbii5kfx9d-1~u<1FN@}oOPqc$-G)`}3vtRyyHM~-U#8Cufu|m9>9SAy%eZfPPbk-nonYx_Y7?watJ6{4^CaoG z85tgWW5M#aA2u_1@V-?UpEY}}iEVgQS`J?6tP=2BW?gd&S>xQFNLBN+!4(O?dpTga ztvmay``S!z1gVH|fmET_(vYa;1QT^T#3I`vLpMKSaXv%-n+FygfY`i&1pw&90068% zJ+QNdsi})I;~zgPzwBg%?y}uBCyI~3hqvzYZGiDl1};e!EO$I}f4vXP^{M9l@u&s1 zwAQ6OtA1T#>QW?;W{u{Rsq);sG$}DM{oHig&0XF+$CL);D)r^D?$Zt!vXrxqDqY7h z)$Y@NFIN})M%)7f5Q~Mx$TJWT;iLZ*G^nQMB*G zD>IH1m!Ivl>3-X@y<2mcL&%UB#UGj8XFL+)Ib#;01MEVlCEczOTTQ@ZZ( z`BfeD`b_U;601vL>GC1g%D{H%=pBFYVPWO^o4W0?!mKYMro^O6K7agv+iL~G3%nqR z?9_qG4B2kcs!t`-D*#pQ^V^Z<>>1PH*PPksj;jqNzlb+ZIsG!*2h;UeeLr5c_W5QE zU8w+cu^(gK??THKzZHY3jOVLrt&k2L-6ygq|9OH2%>XzxAJZcicf zqJK^x{Dx``2uBHiXiiqNje4PF0{X)j4n$T9-1GXb2LTBKd5jJ7eX*TO%2i0FFFyV` zS;0FBz9EqeaCI$MRD5q-KbSwcxbP?0dir^l2~fn*5XpYNQzp+ohw$!J%%rASZeHEH zs#CA^WM?z>U|=8Vs~&;imvx_pczwL#Z_V-xhWsEB_9YU^rjJt8ACif1DNGsn(jV!5 z;-jP?sro5fUbKe`a(uKVsqm`bzT^;*LRGv+OBNT>ay@M9{OmCeuKP5UXLK)-@yZq_ zZ)!&`*ER++afK9E>*xrW5m9#N^4>8CI7wwF9aei5Ee$xv@wU$Phq5zLILy&URIbv2 z=DXNQ1?vw!<#54;}mG!~ieqm2d_V=|uWc%D8%Wf+K$ff>>HVw(gbC9cOk zTrNE1dq^#kHVg=nal$Vg0O>r;=4|MAmgyI@Sn>DQSoMsK$yd9_UMN_nX-^XdP$NDa z1zJ0Y)*061Fg2%gGStRGrTg@09@mEgNm94*XrMhXd-=|B9z@$YO&JnGhX!LfQP8dT ziWq)DZmiLhX{a+q%Bp)Z*lH1FBeBU(PloK%6s<2d4Mw6JT?~B%&c`Br0nHjuw1~7* zk~cmPWl}2+kzb3q-04(1I~hJ`Ka(+-)9!Lq_z;!EBpccIa`b8rhFDq9z3RZG2Mh^4obfqZLIgqG%VPjz;nlt(HDklASL|%v;J~NZ|)R(De>3JO+dq zCwB@(U9fhA#S-9%?=x1h&t};K^LXTIF2`j-DImW5M4j#`66HtL=&SfZu#XLZNGn5E}eLmPy zO33%1BTFkysMmoUNK%e?`EEYJkaFYMD2r&zND7N~V6nKJBnof4C_8EawUq*P4eO3t z+PEJMRV8dmOH3-i99lwe78#+P)O))W)fRNKaBPP+hKP1RtGg8jnUro33Iw zFxBx=b%aK0!`vGVyY#j+I+=Cv(nbnUdP*^bczKUaeRzD=s-0jjb#r=*62zLymGTltg07v*_eqej*K!WSU>B5q1>x z)N@9EtoPV2>+|@G9T~J*i1$UtvV4rpaVg0zCqaYy=sMVn>^t$P8x5)cmpEc6TNLo} zq^LVAiy0kEw`}Q8m_kYLWD8NT#O$OxHWb*H8Q@5XFqWLP3}ie`%0ePeQ5jZ=cVcsF zkpcsZaT!uJya(*t#wBAz>KRhqvlJkG6~Z__tOl+aa29LpVC)N!#Z@yp1lqa|Owe8W zHOeOXo#?By5wnAxfeVQx#rE{>ZchP6!ls0$9Be6PU#)N|avttiIE$UTlh?A!T5jD0SC|4vSpQ~wb5 zoO#F0L-#V)d(mEK;kXED>#Z{e~X0_}+3-hP?N7ZmRuMzmeeq;u-UdnMwH= z+on|_z(|-F-rSO~ihVc+ef@a|+Hgl~`q+|N!gnapZ}O|4oj9y9?tnhqpkhY+>YOZV zQye`s5sSRlw8_Y#A~6rf_Sac)tXd{EuMBaJP&pHp4MsS|1G~LQqN~ivFd%3tEWQVJ zZH3G~sRx>*$e~B`(-i}_p=j-5yR-V!M2W)MbdBzpPw9-|gT^ItwtYn2-Ch~sal)BI zPAHUdnwTi!Fc)k1KIfs8pw>7yAs+jsN&WSlT5#jp$*Nn-GeztyOm@;bFPRK~OiHDq zZ#ZU@69*-AY>iM4^Ky?O`f*MUC*`N`8+NWHb+PqbeMtj)Y7EPxxA7dMgA`xG^? z6H0EuknY$J^rTUWx{)&BvdqsGH0pu2C*C!*=M7hiUGRLQZL8J&g#twMhg8pSW_$G5 z1ebR#R=Tr{HMf#o6Zb7pv*gcjNYtyV5fdf!9fC9Y(84M#&Rwlo(B& zF{#Fw5Ifdau|+H?Zkd;Occl{nDBIxiv6Co6Bb2(!sN!_JUVaUio68z(N3^{>3SXi? z*TZgD_(Nu7kBu8V2Y^d0%oIkdWg?4eNf*YV{&Kx`z|XRt{YA-MKF_h$HD~t$$#pIv zJt~h4XlqKbu;G${%;_1YmaFm_ifJ*F#Nz53Gj1RjFqFjjO6=QBy);9(31mKau#N^g z1_O8 z177Gn8(e$c4#bAN=>ESlgz5>T5^Fsd5s27;v77 z8W0tqXJ7L>qtVx(9)=2hXw@AYqf#r=G|*09hBNj|$0Q3i7hzzj*eI7wfdC~8Sc#&i z*Ptnv!7lRr#fKY}*JXkjd&fQmm z-OUZ=J41@7`UxuqhJdXg;vqbRy^3M;#tB6Qc8~k2ZLc^}Lmmr{F5|t}R1Smo$sCSG zQ_&^7f2pT#@wBCTnTjWb|2CGJm8+xaT$}amoM;hP9pS!dGwqG^*dWiP#-58^gLdV| zf|-7p_e4*%?4btb6?5#Vd|yGdynPT1tZkc?%L&%}>3m%g);p#&D<3?-q_M{k_a>I| zR_&;LV6dH<@IO_9cvmX-m{Wy?Sh^gNhwy4d8qxROwH|;9f?=z(Nm%K|5yucmEOY*Z zjUZLs$dthG^!|qBkdokdNECI}+_ts+jHMe4uIuwPrv)U5&-wUIw1-(|}~ZI`eTDTnN>VAL249&#|%Xic2(*knFP z<{b!0?#Yd|My_l@o}ihFDVl#SX1E3O1Q16|GH4LrBiyly`aue8c;~gBDQ~qtxsbGo zpelUP@;{&?MLyqk?RoLWt0$%iG4(&^BN(@MO%5_7um`R6P*2 zl2l}u>c9)%#$sluSeZP1ct+0zk1czRU@_X zS>gHzopPL6IO>l&8Ftv-oL%p+GPO~U2*77iG(FL);2KUR;0?tgmQI^ub6Uh0eexI* zs0rexBtEWR@t5IH${CrWh=ULwc3zjFXP`|;4wMz;IvVLITcr&Bbi)Q&byUwMO#P|NFfjoF?$qS@ zVf{PY5oO!ZL$NwPd-}Kz!NJkD6iv5RZ0q>XB9A|mCZMu?ii$o@bz39cIukUe)b6=` z)fKo>()sotb%lX8EIchDZ84SnQSjt~Z$lmYjNisHn5c|#u?NgDu;F?kv}YJ$>BCgM zO}DaoGp4?HH@v6Tq5OO>olm0lUFJYGhWV?LK0{~cBX3Tv46Ij^8||upDY2zOQ3*pV zMRb&7l;`)oPN?mdr;P)8k6k7s>7`U?#0_QSHn^uL(1dmFdD+!_U^?rc-s@pV2x$ur z0AQB<9}``FxG-R%%W0b(#b>?j1u{9?O$9gHU7BIJG`>=%jJ>hcB*AA-GU*GB)V1`r z?yE2E0mVDEPY8*d-xrMW*guDki4y|)UJwmpLc5DEy%#l#Lk&4$LV_+Es@R{VzxSRu z-$kBL_I7_^kk8ms%o+*ZoWEpw-n=;TYk$+t63}-u)k6H%b9l+LGJErL)x%rsY{b|R zTdjH*TDxiXL38GO{p$PuOHc?pjeFpZj6SppgobsFHTHaZU z9bJM5oae6N+v?PTCCAnNm0C_eZ?6|$J$!@izR(v#1HW1E(4g(d)4@~~R|bB(jn1x5 zQnhwAKi@d_VxMkW+ftp4m(U&V7H_7q@kZ;@cI(T?5KP3zZ+ zO(ns>0xC82`B@OzZ(Tv#`u%8%THcca0%sgI2)?l5mIQso-ynZB%jn_1$-ujC>0?a? zpVzlC;fp1*$+Eicet9h;fa@#%KuU~zu2K^p&YsKY19o0t8y`(!`Pn2Gi`xTokIeH`&~5 z7n>QWK?8zw5-Om%Hdq#WGMbhKr|4LUp_X~}4mm`{%tuVV?Dk}v+eOb?VG;S`$~M)u9Dq&#NeHa|$G&D%p^E3`{6v&HPZNKRl42 z+k>#1WDr4-FM|Z5Zip7jStY{IuA*s=xcZJOwT%im_J*bQg^2E$@I;h@^KKu@G(E+m z3!1skvUQVysi&mXH@Ic;d}Kq^u?k+&0g}%hoX(##k;e%h^Do3EQEC}d7Q$pDywSA& zljt(=sq+ZwE*@V{--8=*=C?6wm*R>nqZA!VA{{8qG_U6pE?ltFo#YA`WM}_qoRNt} zJsuQJ+0=c(&!w>k_oKK%IL)w~ErdjvKm?gICJzc3)oVMFgl{_>AJ->5D>I2RmN?kU zsf7l~Yjji7p50Xjtp>y|R8{g7d91HrF9sYO^T2 ziHChyxs@}ZcXO1xBP;Jmv)DL$-NKzjhk7ZD`V3KtQTC&0+f;XyVxskb3`>PyF%411 zQ~sD;y%}z6GM~t1I%FdCPnv>85k~i}oLt1c0tVovR~D2}qCP)k7_{)H3l}`GJ`UUS zq9=}$u}c%vZi!Gxbu}u_+`?U|s7sGS5$(DK4h?4s8o5zMrLF1s+3mSo-H_i8On0Si zs(IYtJT$RKJ5|ZKh9ZhkZv!(ST;O#iIpon?hJ8cL;56ghCf^7jibUVLCMs1$dP*Et z5kM{=x$5%t$hYc}`(r&?h~OCyN1;C)3ynjQN_0|dk=|xA3m2(f3kz;piRYq(Do(4s#I-sqfSuxypx_T;lqvE~PWU%G7O;=kO*^$)ns4 z-_g!?v5^*;)kJes?io3V1DMb457qgp#ocx?&uBFG>F3%CAa(f}#VcPMriH#zY|e1t zV&BpkG}&scv8?&l^ho~X02fXUT~5K%%nFf3FQ$onFOCvMbfrxJICvRmM*`K z;F|JCdX#2VMG5{ILK@O-RFjH*@?w1oye%{CzV0ZBAf1aOjrPh_t6?1}CU;f7t68Xy zpUVuZed^4S+#S9seTBut9w_E45Wo(A9 zp-%j3KCwtb1UZyC8@2&dp1M8Pq6dwm0CfJSgzq>Bd!3j~f+MmFpfrUtOjjO7@(1@p z6y8Ho#BL;q%uzq#1HOq33T|~5{sUk6!jO9H@TYQ3&@ju;hP=jhe?~w8KJCdlx?oQj zP1B78V>GuVq~1mHdzV^GC2=dCKGvbz3Zol-D$;$0hb3AN(Mu`Z2o)%k!3b~4lNC+B zUFotL2?b(K&q3V)*E_t4ev${N<1FE@kLtBcYst@SJ2WW(qa~pWsvlYL*tKnBQF!-z z%5;nj!~PtnJaqj%A@T+bcXY<2Is-+_skPut>HDx7;PD#YpiP4LU=O(rO^)QSe~R+4 ze=Hp|V?GMbJ=5wJZ;FsZtulfpGRtJ3NjcV38h4*V5+|@8OV%<^MAbTvZ3M2D?LCFh zac$FnF5kmFAM*LSWEU+ zk^I^0E)IzIMrkn8QgAc5oQ&h_YkEBxNvQs=Ty7ur#jLgN`s`!GA1@H{J+>jS1K)9t z7+n;TrI{pIUlflba-<2(WFB_EMb7w`qL2?vdfoqcgnA!2)9a4de51NjpEi37x8>OZ zv}$RmNN(t@hk9Gq+(?XXbJp5djTCVloRL_d8Z)-b`Hly&H@&kZa1fFK zNKK&ZAWGvOOQJQHNXp(9WxLYgtBm(Z_}+eeD7#Rharqia;W+N3zT8gn%)8?R6sMU) z@ucr~;KQad?Nr~J0~$_Ca`#c8XAT&d+ilolD6_SmT4KqyZ2LBN_F#a+a}ybo9>nAD z4)9Y_Gctqe`;|37yVrW^*G5ED7#DJ))q^9oc9ENT+KRZ*U1p7g@PsYS5svY% zTS`@~eLJfeXZFs}wCrp!d9dR?kY zb4#7Rfj61+l?(KAKy*HE!M@{%QHp;?_?Yq-pNOy&-)g_o+PWL{@xmz<^Sjhw*Dl@W z$J@uemy-g9gQ&pZPQ{$?Za3^LDO{eFMexXPj>=EZ`z{nkz^0% zSEg+1NMRbQEETuqKf!osQcvUqY#)qonjrFQ;_%XcQ1`s(goYrDl zEiCt*kmh>_LF(G03k{%@db(agDmrD(FV=c9{`6Py`Drd}3rJ=Y27L4-P$kDCOzUq*#Cfs@|&s#hNsVz|l=hL(jCXncM<*|5k) zLD+>7kArJcduKki;ipP?Vki*%_OAs1$=Jo@dce9V6EFY(SRnPQ1kc&R(8<(9#l^|e z&is!eDVxOa^1Vz*Vt10yB0GF?u*$)T2{4UyfaA?x>_p{yNS3Kf4 zyvk7I(*%~ml`P&5h}jN2m5ji z!Ko88Py0#UDR)UY>ASOx7q z1Wxy%SUV*h6qec>2TjGP(vdKAJI4&AQC&9h$fG58qqPE(GJVD(H}&~I`aMeA7+nkq zH-pW)Y_cnL+HFY!O$56GHl}%*sB+uZPRK$w9*^=*LW;=uxRLtjO|xOS zC!fFGCM16oR<%=Pqriit!rd|j`wfPU$@%LKN|p7`%k912ylJu0Y0Gu zGK~OOr~D_={!^~}i*#hyM3Bso z+`Dc)N%@-rJxS@t@4Bj+y27eQ7^lD+@2{KDX-}FZi328t+;niO1%xAXP?6_=o+dFH zZY!*HA5lm5v87pkz0HvVgW)a4;YswR_#^Jq-m<1|bh6+elCDqehic#T_*{>W8F>V{Zsbj>(jNjhy6dT_VBMJE1;cI#X6GhL~c+ zF!8ciJ0^DVod}LI`9RK%0?8P@>B6DHM|kAXe6)H26p3ayq}G$Jc+K*J|L|e*TmKhF zC@dk(7SR>XOMiv25tB#Ei+IBwe8)LnC^`bTbn5MNx4894OsX^f&L};W&D>OGB*s1l zu=ST2)pb3O{{4nc5xJU8D*)K?SHgXHXK9|q*aFLfRukeq1 zR~65imk6>Sr8`eLUjNm}7sfgdb^>Kt6ZjPQud-}x?_~O4VgA!cc_%64o#nW8mmM|FHAjV32(J%=ER?407k^uTE?|vL8;cy~NP%}GJ9q42uNJom}z+7*?i1Y-wvLdEf(s-GCE{fgohIrW5y28591 z=}n~?C>CloTHyF_sN|?n zY;N}Hx<;KkcQxJ_pr@AS3_rHtm7Z;jIm^BpkaYd<$fhTC)Y|dvu2u*`&BamI12558 zkEO@Y96Ge(?RbKnB?NBZKMi<0tcQjJ@JSr-5A`1>+y7sO+B#?kY8p8PYIV{=Mt_ z*Qmc<|I!1lDD!s$fA2p1E4USC68_SQ`aAIN9eIBRt^q4E{^$O@-|76`c=abG0q8&I z{Iw11cku5GHGhH&fd{i+0p$N{ulXJNd%5|a&~D(74G8`9-hULI|4!xiN|ZmT9OL~# z<@dUj-wFKwG4M|UWu*T)jX%WmcPhV!&VN$rVfqgreh;L7r||b^?@v4cpu__BA7j7Y zsr)_s`74zW*1uBuw_xaZTE8En|D+c6{*N{N?MVGQ`tOIdKd}IS6$dcI@Y|8?cl6)g y;$PA1+MFb~wbYv3>d;Qs(Tyq`A! literal 0 HcmV?d00001 diff --git a/nifi-extension-bundles/nifi-poi-bundle/nifi-poi-services/src/test/resources/excel/sheetsWithEmptySheet.xlsx b/nifi-extension-bundles/nifi-poi-bundle/nifi-poi-services/src/test/resources/excel/sheetsWithEmptySheet.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..beafa5f08c7e28ea8b8d311dba319859acb82307 GIT binary patch literal 7445 zcmbVRbySq=x28+Fk?vGdx*KMOkd~J2lpeYyrAr!VX@^#j?iA@xi2+H;3+MdS(c`^8 z|G4{GYrdJa=G`;jv!DIGdp}zR0FQtVgMxwrBbP9)4fBgVx_@uzY-a1i{`BGXeSEu8 z2M2m6)H{OZ{MmvyN^W5@P`;U(hoX(np7g`({LY~7XVQ4M=+QMDF;2dIw=dr<0`1ur zIT!#2uQhaD!?iM)fQvc@Zk;^Yd57|RyK(sCo;ygfIL9h0o(!%?^cQ%$zEMQmJ_Z5C zV7E;6OkqUC!<_Xs+_`Q|!#9I<_M&i|EHv|`{SaD$L3TFJ>=h`A} zG|x3^XRY|y*Ls>rn;{L4iH{!{QpG_|zd23GCSl}%<&b4BIdXWwX&i{o^6ef4FW5if zg!d~>Ca+vv9qb;U`Y-`<2;?9}KD-h|+#%OhQAanz=Mmbg>P+$0ir@)?Se23LoGoaD zZjYHx%uS?R`|T(2ru$7Lx#THwx0@_uU^_A~(pX$$2Q9emD+T0ZYzkB$6meP>BOR|Z zdud2MHzWQM16meDddKOEl*>T>zM5j%%w7vE*V=YZg*=A|P1cg3x4Lj%oHCb_PR>qx zzaK!h{oS1L_-n7;n=@ChPj&@EfIJtEHT)08p6DNIsD^4_5Gy{RseP4N9LES->6B3m zzJ8Zo(b4sdl)q+S4?b*dcF~7=jIRi(j zK2^>MSAJ4@a;wN@r~E8$NVFQk8dUHJr`TwV)KYNq@ScMwAOH4B%G7+{``)TtEZH8B zChCT~l4vgu{ziX#&Haql{q|ixfbTsskx>NdN=PN{(JzDlQv`7S6#)llvj_aa&-Il* zbKrx|wb;Ov`pB!ckhtO%1zbF;$4l1k0jsmm?0#IHyOCxTch-B42#B1Q?W{8m7-4ls zD4K<%)8PBBYEVxKN)2@J09B00)eu=(5lI9CWD>3!HqyWMw?&rl5Hq$REz0ZkzY66Y zwu-3UAbm54*&^xbtU=QER(a^LO}E(0V@EO>6Sns1K!H%34wdtmZCjng#ph7q3^6{L z6e@0FZ7Mp-?UW$LxB+;SgGiBiUZF$7O784iK0)iDBR@K3hwQg2#tXYMVFy`^kW#HK z*o~b=oPG=rWSM0QOWg_+gRofTNap^v@c9r`viLc~#-g$C?v8W(J1;|0h?Cj(CkaM2 z3bB*6u*syknPR4kZ^U4-Q-JT?eRlVo#!NDbx_wR>{j7hnrM7AK z5H0U=?oEgLXf6M7U%|MKmWM67i>s%tnae|@_O-|CGwvg`q^0d^7{CLXLoIGn0(z&F zK1*T&c{{fGYnaf^SxJ5J{hEp`KMU%oNLK8N-0u+CqA;{IW^8$(K_;U5E)5?CQ^_Z) zSl+B#8fWFo^ZOATtzCF8N=l$?L+)e4E=oze$4XE|MGQE^Tw1-yYJT5^Gn4CRE~Tf` z+Y z|4>BK2sUBIY*nJdbYbQdRNM3CpJd;D?TC$P>%r2fE>94N!uWdWmW@qbMt>S(#Wpt; z(d9YkBEQFYJRpw;IJfg*MnC2~P&fW-gg~W~#GTQ$oyl$wKXjL`9W%TmomeVK-pIe{ zwxwi+9z~l#sWAEq;F>Fs2OLHUh6J=cD&f)&=^$99O5}VlFL41TUwP{Ya>4?~jl9I@ zrY>rPHSUTShsaK45pc?Cj_SDBS{Kc`Q-Ku)KGgy)sh0*9k;gPXHY3Og`Deatb=NPN z#E{oNW{A`5ET-T0AF=C2Y1%BuCRjjg(HI|{tP`pUS$k6sugXl|4Qq7;7io_?jbW>8 zP0eQ*g|44QIgVWfmz70OvITP{V@hL#*TJpN!a%XbWqf18q_!|8u2&e#qnp8|gET2o zP~TtAvf=Oze^0k)c*`v}^swsruGyjQATY*!k)!r}Q7~QLKi5pp;K|9GkBJk@IrBiQ z{-O0Z^Ln=y+>3`72bjk}E4D#~GoeE7?ztUvzv2A%a}(=te!Dw3+q`se zuz6s3wAx(EEC)6iswM3krq_UvZQg~Hr(eSN#PxhefuXajns$zDwDfNAwG^aI|NSKl z#G>0`m?V49K`H4lOM|ilP^|p?ARaqY31Nw@NZ#IvgA?-baGosMC@`nY37> zxG&PejzRzK!|C8TNK>*4X!wPSQ;kWVGg7Yg81z11qY$Ag=`f#)_=;dHz7B=^MvSI(G1oY9p zmF=u$16v}=^f9&BAb#y`V!#Z^jkjp?O_YvSyi<(xuC+fcAl)W>TAzZUuLpJ6J7{^y zc{7Nij94?p9fJza7Mj$;v9H9=_~ekNqOC z%JkWs_i;oli(EK8OVhd`&{?2TTst&>7g++Ex52T7GV0ZC*SO@$Ev4?GENjc?Jz-Ew z&}QPbhgeMaktx|}o*Dmk$coowGda3kt7#_PMc@%j7K7N>66UMWPYoLm<=wHTun|Kf zB5rx6;C^v7az@I5SwFcUXiMV-;sdE{jwdvU?@4v>$0P3X-=zAxZ2gs5f0eMU3ICL^ zz3%A{(uWe3szATT!=LF@hP55;3@oJH5F16%s+1onqyt&IyTkM=JvAolM8-W|WM--% zLl9icSbT-H=*I3ITU9~yg%Fv10uKTO%2WS-F`duAb;1IU<~C^oS!Ns!2B zJy1#Zyrr@#g*{CxoaRBZoWQ4Y>+!!EJF}Z3^*P%FOB1nc%g6M<3NTxL<6yZss4am1 zUIX?7T`4PjL&A?@m)#SWKPqw}*S2s11(0i-I0v0EH*ZUstJAAIqZ4VwFa}(&;cJW6 zXXJ{NAAoXZ)@EC|he7O*K;i%Abg?vcHZxUob+)p%`0J2~o@iHsr^fbQX;zkK(1kqX zEHt+IrkR~1+J<71VoCx*pR02)JTVeZ6M&<zI3avYr8JuRI;>)XQKncR(C zyIPCX)C+(Erd56AGi`OjN^UHLBsF|Bd4D`bd`8Q#tdSKr+pqN$)tWRpVWp7@fda=S zeXwvh3lu2-;zx^$-3*})Qx*}KFXP*d?LB4F0q%<_`y!6yN7j7nRt}(7Q$G23D`ohY zG$=MWSBaeLCRK4VW`hI7!hLyMn`Y&$9TI3u(j2`N#Aa;02bgyp6Son~@DI=c{l<%C z?jPLhkI)eR4b204TrADZTwU1zx^X@1P_3FEhdF-iR%j)*N2Qs-upGROOh$SAqk>n} zO7(4vmQJQPx%@BUYcj4c$)8hd+M2|vfoUvcxg8eY#43qhm#f#oo6vtl1Td%(1`Mmp zx9V3xV+Y*%*lJ>I+6k<+t1PPaq|S7k`NFhr_wc0}c_6(6HbGE!8WgxVkTc%tzT zxYs?Ko1Q{yK%vxN$pRH=Id#1=w+q!hPA1dF=L+*4%s{KNNg!64$MMnapFNxLjo{nB z3x$?^p%_6~1@Dr4r$0{cNgBj>!8T9VQySzpyK!}KQzlw={iKb>J}do_oW7!@FW%D{%FxMM7+ZKPnw8b&4o0&o09#r4I5BlDSH}W5Ea9vpPGl*|2vnav zI}m4HVvKaAykaD1NUz;StbVbjhXd$y6`b|M$fnd4$;P@S$!G&?oz@!%U95vdp_9tY zbt^yKH+~WzM6g=|L8a z)tF_xmQ0**DvH$NSXsQ+e%OfoJc3;|-u&b`=~rI9<-F3PJ;~+GY-#U~VyZUvdyBZK ziKOI#ZOA9JhS9!mXm-BAx4_;F&l93gk8iq|>&j?PEF>#pM|<+s)JaPgN-=9Fk_u?) zx{w!UD0sG}C1<}ziocfqnB%6mFs2J@Uchv+ERdEhxT-1s$+0#`T^rFXU8zFo{kJLC zgpLT0`5fBSOMMcA5$Mx|b#pI)Yi)I^rsE0^?EON9lH&IAjYq+xrgAmv@+O%?*_EIV zsA<})hYMikrhUf&o%M+dpg5glB_&{#_1jwp8@gu{6hnw*WBeWTs1veB*E{)yVv0MV ze6)Ft&IKa^eUs#xx2GQrjGxUsel%d>dJ{)#?q~=QO#7ISbsa?C^WxH&AuB4m(`=a) zw}#15d^Q9Q8QAJc-wAE*kQjcBp9gl^7LJ~(v*m_t8jC!Yom(*kol!ny@>I*{_u%_1 zCPDpEiSb`DnfoD=Gd2FnWY=vg8y)-1lMXi={@r~zE3(cV!fDbcF>Ew+Vf4(+eU(ep zr!xg%IRbR6=szwl76(m_cEaNOaLVzDQ>;YO3Q@P=8b`y-E>G=`FZI*;M|do7pRr9K zJMI*Hp^U_v_11#D2!^TCElfJ!luB;QgX@|{z_9F3v?MsLs|y<>c@)(nrD$HebHb(u z+^&E0i=nt{=je`FNQ_PK-WLF&Zi| zkgE(=!SA*CJ>!Hz4@2Sf1$Cd)oE=)XBPtNvDzhaHoX}wHq~2(?^+kWg(^|2TTDR`g z3nK{f(`~N|MvCy#t#8*Aey{4wKu}!X{rb*-{TrMn8B3l!;yWm{$Q)E$awQ;k#Lk9& zCw?JlHh&M6Ke0SnR3&G3)a{6t)1Xk$k{J~j)bJAHc{d+}%=8u3N_HzKK~DMuYbssH z_Z`=|k1;*jvO&pOd$7AqfDDYxD!CS{Tz?@9%q}eiJB&yA6E*U9vSYM7w!EIAm^>DE z=OVK>@vZu>Ue^n0wXYGSl(i`vaHSj(ob&&rb8Oizg*}8Opwxatk85(bc zt1}twUhtDZTpKaAlUK$U{JquK1PECoMHQ5$Zcp>qDV_FLM~f~MXqdlW$)~A*UvZ}n zqWvoC|03Q4H_pEdKUF1+{)gu$&rehV)#{uq5=Zo&>!Ci{tpKGQO5kuK#%KT+AYNB@ zF*sMj#XyBSAf9#7H0P1EQyEq+^RuEsN)}W8=)8fd9Lc!Zj%eR#tX{6T@4E73fvmbh zV|vdc4?#~AC%mqP3^rn|yd+DXWKnd^y_onBX+i53BL9rIcaJV4FM!wFliWaTO{bs| zR#W9hB?&s_^m_eF676c6GhSmg^KMjvp@d)xm5KCJWM3W8>!WrIbGCp!S2GfnE=4K&{I!q0dGQcU7gpeZn0eyrdt6=QJUj| z=)_pAtYS3l^r}y@alw*1FUuPfV;E7NTU-&q>kZ2vBB<;=XW;}m;h&x@-5ljkZXCym zN^TsFTP`eBA$vH|Z1Fj3eCN^g1$?`Gi-d^Eq!`;G!3~A(<_G+!tmwVg^wm2X9*5Dk@+sB3)W4b6v=H9T&@k15Dyo z*%&es$fs>z@~%v}gSkzxflaGhQkC=-wHJ(YJF!+~A@3cfGzDg|()jpWhaMIMePJ!l zwI_#F5<5-=^eZ^Ih0$F_)iT%fFGV1!ZiLjhyVIbh^aa=8y_aqFww$C@q`HHK=I%#R zdrxJ1pCa#d6KcV`nM8IC?3p_Z+4D-ei5T%g!jV(lZvqvBmx<19KR)A$BxBF>JddM-uVOFzY`?8R51EbM&GcCP2%l!4I}*%0^$ZHt zs6Nk<7op1VCE=T^0K~U7E?CEgw<|1c>Dk2zOc^|)JD?;7JP(AVquK6xD@=sNM%?s= z?3>&BD@QH*0IG*7a~&&*))*2cb<8fz!z)@7^)c)L0e*!h;|zCf3c2_dsnr14%+BZ$ ztv2ao64Iu4eYGhU%VI63u1A}D!q=gf{e6MUDq27AVvlZ z)5h+J`P0FFl}FMjv)Yd8DQ&i}N9`zSt0$GhIsZ{JNj~+l%`B!~Gv#{;C;%_wp;v|II1sd#C@$PNV`ryw3<2 P81#qho;AlL51;-A_#?Z1 literal 0 HcmV?d00001 diff --git a/nifi-extension-bundles/nifi-poi-bundle/nifi-poi-utils/pom.xml b/nifi-extension-bundles/nifi-poi-bundle/nifi-poi-utils/pom.xml new file mode 100644 index 0000000000..e4120939b2 --- /dev/null +++ b/nifi-extension-bundles/nifi-poi-bundle/nifi-poi-utils/pom.xml @@ -0,0 +1,26 @@ + + + + 4.0.0 + + org.apache.nifi + nifi-poi-bundle + 2.0.0-SNAPSHOT + + nifi-poi-utils + \ No newline at end of file diff --git a/nifi-extension-bundles/nifi-poi-bundle/nifi-poi-utils/src/main/java/org/apache/nifi/excel/ProtectionType.java b/nifi-extension-bundles/nifi-poi-bundle/nifi-poi-utils/src/main/java/org/apache/nifi/excel/ProtectionType.java new file mode 100644 index 0000000000..92453cb2bc --- /dev/null +++ b/nifi-extension-bundles/nifi-poi-bundle/nifi-poi-utils/src/main/java/org/apache/nifi/excel/ProtectionType.java @@ -0,0 +1,47 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.nifi.excel; + +import org.apache.nifi.components.DescribedValue; + +public enum ProtectionType implements DescribedValue { + UNPROTECTED("Unprotected", "An Excel spreadsheet not protected by a password"), + PASSWORD("Password Protected", "An Excel spreadsheet protected by a password"); + + ProtectionType(String displayName, String description) { + this.displayName = displayName; + this.description = description; + } + + private final String displayName; + private final String description; + + @Override + public String getValue() { + return name(); + } + + @Override + public String getDisplayName() { + return displayName; + } + + @Override + public String getDescription() { + return description; + } +} diff --git a/nifi-extension-bundles/nifi-poi-bundle/pom.xml b/nifi-extension-bundles/nifi-poi-bundle/pom.xml index 93bd06d239..a8f57577e8 100644 --- a/nifi-extension-bundles/nifi-poi-bundle/pom.xml +++ b/nifi-extension-bundles/nifi-poi-bundle/pom.xml @@ -29,6 +29,7 @@ nifi-poi-nar nifi-poi-services + nifi-poi-utils