NIFI-13922 Fixed SplitExcel to use the evaluated formula value for cells (#9466)

Corrected the copy configuration in SplitExcel to ensure the evaluated value of formulas are copied and not the actual formula.

Signed-off-by: David Handermann <exceptionfactory@apache.org>
This commit is contained in:
dan-s1 2024-10-28 20:59:23 -04:00 committed by GitHub
parent d623d1c7b1
commit aacbd514ce
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 36 additions and 2 deletions

View File

@ -111,7 +111,7 @@ public class SplitExcel extends AbstractProcessor {
private static final List<PropertyDescriptor> DESCRIPTORS = List.of(PROTECTION_TYPE, PASSWORD);
private static final Set<Relationship> RELATIONSHIPS = Set.of(REL_ORIGINAL, REL_FAILURE, REL_SPLIT);
private static final CellCopyPolicy CELL_COPY_POLICY = new CellCopyPolicy.Builder()
.cellFormula(CellCopyPolicy.DEFAULT_COPY_CELL_FORMULA_POLICY)
.cellFormula(false) // NOTE: setting to false allows for copying the evaluated formula value.
.cellStyle(false) // NOTE: setting to false avoids exceeding the maximum number of cell styles (64000) in a .xlsx Workbook.
.cellValue(CellCopyPolicy.DEFAULT_COPY_CELL_VALUE_POLICY)
.condenseRows(CellCopyPolicy.DEFAULT_CONDENSE_ROWS_POLICY)

View File

@ -19,6 +19,11 @@ package org.apache.nifi.processors.excel;
import org.apache.nifi.util.MockFlowFile;
import org.apache.nifi.util.TestRunner;
import org.apache.nifi.util.TestRunners;
import org.apache.poi.ss.usermodel.Cell;
import org.apache.poi.ss.usermodel.CellType;
import org.apache.poi.ss.usermodel.Row;
import org.apache.poi.ss.usermodel.Sheet;
import org.apache.poi.xssf.usermodel.XSSFWorkbook;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
@ -26,6 +31,8 @@ import java.io.IOException;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.List;
import java.util.Objects;
import java.util.stream.Stream;
import static org.apache.nifi.flowfile.attributes.FragmentAttributes.FRAGMENT_COUNT;
import static org.apache.nifi.flowfile.attributes.FragmentAttributes.FRAGMENT_ID;
@ -33,6 +40,7 @@ import static org.apache.nifi.flowfile.attributes.FragmentAttributes.FRAGMENT_IN
import static org.apache.nifi.flowfile.attributes.FragmentAttributes.SEGMENT_ORIGINAL_FILENAME;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertNotNull;
import static org.junit.jupiter.api.Assertions.assertTrue;
public class TestSplitExcel {
private TestRunner runner;
@ -131,5 +139,31 @@ public class TestSplitExcel {
runner.assertTransferCount(SplitExcel.REL_SPLIT, 2);
runner.assertTransferCount(SplitExcel.REL_ORIGINAL, 1);
runner.assertTransferCount(SplitExcel.REL_FAILURE, 0);
for (MockFlowFile flowFile : runner.getFlowFilesForRelationship(SplitExcel.REL_SPLIT)) {
try (XSSFWorkbook workbook = new XSSFWorkbook(flowFile.getContentStream())) {
Sheet firstSheet = workbook.sheetIterator().next();
// Start from the second row as the first row has column header names
List<Cell> formulaCells = Stream.iterate(firstSheet.getFirstRowNum() + 1, rowIndex -> rowIndex + 1)
.limit(firstSheet.getLastRowNum())
.map(firstSheet::getRow)
.filter(Objects::nonNull)
.map(row -> row.getCell(7)) // NOTE: The argument is 0 based although the formula column when viewed in Excel is in the 8th column.
.filter(Objects::nonNull)
.toList();
for (Cell formulaCell : formulaCells) {
Row row = formulaCell.getRow();
Sheet sheet = row.getSheet();
String messagePrefix = String.format("Cell %s in row %s in sheet %s",
formulaCell.getColumnIndex(), row.getRowNum(), sheet.getSheetName());
// If copy cell formula is set to true the cell types would be FORMULA and the numeric value would be 0.0.
assertEquals(CellType.NUMERIC, formulaCell.getCellType(), String.format("%s did not have the expected NUMERIC cell type", messagePrefix));
assertTrue(formulaCell.getNumericCellValue() > 0.0, String.format("%s did not have expected numeric value greater than 0.0", messagePrefix));
}
}
}
}
}
}