diff --git a/nifi-nar-bundles/nifi-poi-bundle/nifi-poi-nar/pom.xml b/nifi-nar-bundles/nifi-poi-bundle/nifi-poi-nar/pom.xml index e61cd4cc10..e082a25af5 100644 --- a/nifi-nar-bundles/nifi-poi-bundle/nifi-poi-nar/pom.xml +++ b/nifi-nar-bundles/nifi-poi-bundle/nifi-poi-nar/pom.xml @@ -30,11 +30,6 @@ - - org.apache.nifi - nifi-poi-processors - 2.0.0-SNAPSHOT - org.apache.nifi nifi-poi-services diff --git a/nifi-nar-bundles/nifi-poi-bundle/nifi-poi-processors/pom.xml b/nifi-nar-bundles/nifi-poi-bundle/nifi-poi-processors/pom.xml deleted file mode 100644 index 8669c93cc6..0000000000 --- a/nifi-nar-bundles/nifi-poi-bundle/nifi-poi-processors/pom.xml +++ /dev/null @@ -1,76 +0,0 @@ - - - - 4.0.0 - - org.apache.nifi - nifi-poi-bundle - 2.0.0-SNAPSHOT - - - nifi-poi-processors - jar - - - - - org.apache.rat - apache-rat-plugin - - - src/test/resources/with-blank-cells.csv - - - - - - - - org.apache.poi - poi - - - org.apache.poi - poi-ooxml - - - com.github.pjfanning - excel-streaming-reader - - - org.apache.logging.log4j - log4j-to-slf4j - - - org.apache.nifi - nifi-api - - - org.apache.nifi - nifi-utils - 2.0.0-SNAPSHOT - - - org.apache.nifi - nifi-standard-record-utils - 2.0.0-SNAPSHOT - - - org.apache.nifi - nifi-mock - - - diff --git a/nifi-nar-bundles/nifi-poi-bundle/nifi-poi-processors/src/main/java/org/apache/nifi/processors/poi/ConvertExcelToCSVProcessor.java b/nifi-nar-bundles/nifi-poi-bundle/nifi-poi-processors/src/main/java/org/apache/nifi/processors/poi/ConvertExcelToCSVProcessor.java deleted file mode 100644 index 362c30b90a..0000000000 --- a/nifi-nar-bundles/nifi-poi-bundle/nifi-poi-processors/src/main/java/org/apache/nifi/processors/poi/ConvertExcelToCSVProcessor.java +++ /dev/null @@ -1,534 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.nifi.processors.poi; - -import com.github.pjfanning.xlsx.StreamingReader; -import com.github.pjfanning.xlsx.exceptions.OpenException; -import com.github.pjfanning.xlsx.exceptions.ParseException; -import com.github.pjfanning.xlsx.exceptions.ReadException; -import java.io.IOException; -import java.io.OutputStreamWriter; -import java.io.PrintStream; -import java.nio.charset.StandardCharsets; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Collections; -import java.util.HashMap; -import java.util.LinkedHashSet; -import java.util.List; -import java.util.Map; -import java.util.Set; -import java.util.stream.Collectors; -import org.apache.commons.csv.CSVFormat; -import org.apache.commons.csv.CSVPrinter; -import org.apache.commons.io.FilenameUtils; -import org.apache.commons.lang3.StringUtils; -import org.apache.nifi.annotation.behavior.WritesAttribute; -import org.apache.nifi.annotation.behavior.WritesAttributes; -import org.apache.nifi.annotation.documentation.CapabilityDescription; -import org.apache.nifi.annotation.documentation.Tags; -import org.apache.nifi.components.PropertyDescriptor; -import org.apache.nifi.csv.CSVUtils; -import org.apache.nifi.expression.ExpressionLanguageScope; -import org.apache.nifi.flowfile.FlowFile; -import org.apache.nifi.flowfile.attributes.CoreAttributes; -import org.apache.nifi.processor.AbstractProcessor; -import org.apache.nifi.processor.ProcessContext; -import org.apache.nifi.processor.ProcessSession; -import org.apache.nifi.processor.ProcessorInitializationContext; -import org.apache.nifi.processor.Relationship; -import org.apache.nifi.processor.exception.ProcessException; -import org.apache.nifi.processor.util.StandardValidators; -import org.apache.poi.openxml4j.exceptions.InvalidFormatException; -import org.apache.poi.ss.usermodel.Cell; -import org.apache.poi.ss.usermodel.Sheet; -import org.apache.poi.ss.usermodel.Workbook; - - -@Tags({"excel", "csv", "poi"}) -@CapabilityDescription("Consumes a Microsoft Excel document and converts each worksheet to csv. Each sheet from the incoming Excel " + - "document will generate a new Flowfile that will be output from this processor. Each output Flowfile's contents will be formatted as a csv file " + - "where the each row from the excel sheet is output as a newline in the csv file. This processor is currently only capable of processing .xlsx " + - "(XSSF 2007 OOXML file format) Excel documents and not older .xls (HSSF '97(-2007) file format) documents. This processor also expects well formatted " + - "CSV content and will not escape cell's containing invalid content such as newlines or additional commas.") -@WritesAttributes({@WritesAttribute(attribute = "sheetname", description = "The name of the Excel sheet that this particular row of data came from in the Excel document"), - @WritesAttribute(attribute = "numrows", description = "The number of rows in this Excel Sheet"), - @WritesAttribute(attribute = "sourcefilename", description = "The name of the Excel document file that this data originated from"), - @WritesAttribute(attribute = "convertexceltocsvprocessor.error", description = "Error message that was encountered on a per Excel sheet basis. This attribute is" + - " only populated if an error was occured while processing the particular sheet. Having the error present at the sheet level will allow for the end" + - " user to better understand what syntax errors in their excel doc on a larger scale caused the error.")}) -public class ConvertExcelToCSVProcessor extends AbstractProcessor { - - private static final String CSV_MIME_TYPE = "text/csv"; - public static final String SHEET_NAME = "sheetname"; - public static final String ROW_NUM = "numrows"; - public static final String SOURCE_FILE_NAME = "sourcefilename"; - private static final String DESIRED_SHEETS_DELIMITER = ","; - private static final String UNKNOWN_SHEET_NAME = "UNKNOWN"; - - public static final PropertyDescriptor DESIRED_SHEETS = new PropertyDescriptor - .Builder().name("extract-sheets") - .displayName("Sheets to Extract") - .description("Comma separated list of Excel document sheet names that should be extracted from the excel document. If this property" + - " is left blank then all of the sheets will be extracted from the Excel document. The list of names is case in-sensitive. Any sheets not " + - "specified in this value will be ignored. A bulletin will be generated if a specified sheet(s) are not found.") - .required(false) - .expressionLanguageSupported(ExpressionLanguageScope.FLOWFILE_ATTRIBUTES) - .addValidator(StandardValidators.NON_EMPTY_VALIDATOR) - .build(); - - public static final PropertyDescriptor ROWS_TO_SKIP = new PropertyDescriptor - .Builder().name("excel-extract-first-row") - .displayName("Number of Rows to Skip") - .description("The row number of the first row to start processing." - + "Use this to skip over rows of data at the top of your worksheet that are not part of the dataset." - + "Empty rows of data anywhere in the spreadsheet will always be skipped, no matter what this value is set to.") - .required(true) - .defaultValue("0") - .expressionLanguageSupported(ExpressionLanguageScope.FLOWFILE_ATTRIBUTES) - .addValidator(StandardValidators.NON_NEGATIVE_INTEGER_VALIDATOR) - .build(); - - public static final PropertyDescriptor COLUMNS_TO_SKIP = new PropertyDescriptor - .Builder().name("excel-extract-column-to-skip") - .displayName("Columns To Skip") - .description("Comma delimited list of column numbers to skip. Use the columns number and not the letter designation. " - + "Use this to skip over columns anywhere in your worksheet that you don't want extracted as part of the record.") - .required(false) - .expressionLanguageSupported(ExpressionLanguageScope.FLOWFILE_ATTRIBUTES) - .addValidator(StandardValidators.NON_EMPTY_VALIDATOR) - .build(); - - public static final PropertyDescriptor FORMAT_VALUES = new PropertyDescriptor.Builder() - .name("excel-format-values") - .displayName("Format Cell Values") - .description("Should the cell values be written to CSV using the formatting applied in Excel, or should they be printed as raw values.") - .allowableValues("true", "false") - .defaultValue("false") - .required(true) - .build(); - - public static final Relationship ORIGINAL = new Relationship.Builder() - .name("original") - .description("Original Excel document received by this processor") - .build(); - - public static final Relationship SUCCESS = new Relationship.Builder() - .name("success") - .description("Excel data converted to csv") - .build(); - - public static final Relationship FAILURE = new Relationship.Builder() - .name("failure") - .description("Failed to parse the Excel document") - .build(); - - private List descriptors; - - private Set relationships; - - @Override - protected void init(final ProcessorInitializationContext context) { - final List descriptors = new ArrayList<>(); - descriptors.add(DESIRED_SHEETS); - descriptors.add(ROWS_TO_SKIP); - descriptors.add(COLUMNS_TO_SKIP); - descriptors.add(FORMAT_VALUES); - - descriptors.add(CSVUtils.CSV_FORMAT); - descriptors.add(CSVUtils.VALUE_SEPARATOR); - descriptors.add(CSVUtils.INCLUDE_HEADER_LINE); - descriptors.add(CSVUtils.QUOTE_CHAR); - descriptors.add(CSVUtils.ESCAPE_CHAR); - descriptors.add(CSVUtils.COMMENT_MARKER); - descriptors.add(CSVUtils.NULL_STRING); - descriptors.add(CSVUtils.TRIM_FIELDS); - descriptors.add(new PropertyDescriptor.Builder() - .fromPropertyDescriptor(CSVUtils.QUOTE_MODE) - .defaultValue(CSVUtils.QUOTE_NONE.getValue()) - .build()); - descriptors.add(CSVUtils.RECORD_SEPARATOR); - descriptors.add(CSVUtils.TRAILING_DELIMITER); - this.descriptors = Collections.unmodifiableList(descriptors); - - final Set relationships = new LinkedHashSet<>(); - relationships.add(ORIGINAL); - relationships.add(SUCCESS); - relationships.add(FAILURE); - this.relationships = Collections.unmodifiableSet(relationships); - } - - @Override - public Set getRelationships() { - return this.relationships; - } - - @Override - public final List getSupportedPropertyDescriptors() { - return descriptors; - } - - @Override - public void onTrigger(final ProcessContext context, final ProcessSession session) throws ProcessException { - final FlowFile flowFile = session.get(); - if (flowFile == null) { - return; - } - - final Map desiredSheets = getDesiredSheets(context, flowFile); - final boolean formatValues = context.getProperty(FORMAT_VALUES).asBoolean(); - final CSVFormat csvFormat = CSVUtils.createCSVFormat(context, flowFile.getAttributes()); - - //Switch to 0 based index - final int firstRow = context.getProperty(ROWS_TO_SKIP).evaluateAttributeExpressions(flowFile).asInteger() - 1; - final List columnsToSkip = getColumnsToSkip(context, flowFile); - - try { - session.read(flowFile, inputStream -> { - try (Workbook workbook = StreamingReader.builder() - .rowCacheSize(100) - .bufferSize(4096) - .setReadStyles(formatValues) - .open(inputStream)) { - - if (!desiredSheets.isEmpty()) { - desiredSheets.keySet().forEach(desiredSheet -> workbook.forEach(sheet -> { - if (sheet.getSheetName().equalsIgnoreCase(desiredSheet)) { - ExcelSheetReadConfig readConfig = new ExcelSheetReadConfig(columnsToSkip, firstRow, sheet.getSheetName()); - handleExcelSheet(session, flowFile, sheet, readConfig, csvFormat); - desiredSheets.put(desiredSheet, Boolean.TRUE); - } - })); - - String sheetsNotFound = getSheetsNotFound(desiredSheets); - if (!sheetsNotFound.isEmpty()) { - getLogger().warn("Excel sheet(s) not found: {}", sheetsNotFound); - } - } else { - workbook.forEach(sheet -> { - ExcelSheetReadConfig readConfig = new ExcelSheetReadConfig(columnsToSkip, firstRow, sheet.getSheetName()); - handleExcelSheet(session, flowFile, sheet, readConfig, csvFormat); - }); - } - } catch (ParseException | OpenException | ReadException e) { - if (e.getCause() instanceof InvalidFormatException) { - String msg = "Only .xlsx Excel 2007 OOXML files are supported"; - getLogger().error(msg, e); - throw new UnsupportedOperationException(msg, e); - } - getLogger().error("Error occurred while processing Excel document metadata", e); - } - }); - - session.transfer(flowFile, ORIGINAL); - - } catch (RuntimeException ex) { - getLogger().error("Failed to process incoming Excel document. " + ex.getMessage(), ex); - FlowFile failedFlowFile = session.putAttribute(flowFile, - ConvertExcelToCSVProcessor.class.getName() + ".error", ex.getMessage()); - session.transfer(failedFlowFile, FAILURE); - } - } - - private List getColumnsToSkip(final ProcessContext context, FlowFile flowFile) { - final String[] columnsToSkip = StringUtils.split(context.getProperty(COLUMNS_TO_SKIP) - .evaluateAttributeExpressions(flowFile).getValue(), ","); - - if (columnsToSkip != null) { - try { - return Arrays.stream(columnsToSkip) - .map(columnToSkip -> Integer.parseInt(columnToSkip) - 1) - .collect(Collectors.toList()); - } catch (NumberFormatException e) { - throw new ProcessException("Invalid column in Columns to Skip list.", e); - } - } - - return new ArrayList<>(); - } - - private Map getDesiredSheets(final ProcessContext context, FlowFile flowFile) { - final String desiredSheetsDelimited = context.getProperty(DESIRED_SHEETS).evaluateAttributeExpressions(flowFile).getValue(); - if (desiredSheetsDelimited != null) { - String[] desiredSheets = StringUtils.split(desiredSheetsDelimited, DESIRED_SHEETS_DELIMITER); - if (desiredSheets != null) { - return Arrays.stream(desiredSheets) - .collect(Collectors.toMap(key -> key, value -> Boolean.FALSE)); - } else { - getLogger().debug("Excel document was parsed but no sheets with the specified desired names were found."); - } - } - - return new HashMap<>(); - } - - /** - * Handles an individual Excel sheet from the entire Excel document. Each sheet will result in an individual flowfile. - * - * @param session The NiFi ProcessSession instance for the current invocation. - */ - private void handleExcelSheet(ProcessSession session, FlowFile originalParentFF, final Sheet sheet, ExcelSheetReadConfig readConfig, - CSVFormat csvFormat) { - - FlowFile ff = session.create(originalParentFF); - final SheetToCSV sheetHandler = new SheetToCSV(readConfig, csvFormat); - try { - ff = session.write(ff, out -> { - PrintStream outPrint = new PrintStream(out, false, StandardCharsets.UTF_8); - sheetHandler.setOutput(outPrint); - sheet.forEach(row -> { - sheetHandler.startRow(row.getRowNum()); - row.forEach(sheetHandler::cell); - sheetHandler.endRow(); - }); - sheetHandler.close(); - }); - - ff = session.putAttribute(ff, SHEET_NAME, readConfig.getSheetName()); - ff = session.putAttribute(ff, ROW_NUM, Long.toString(sheetHandler.getRowCount())); - - if (StringUtils.isNotEmpty(originalParentFF.getAttribute(CoreAttributes.FILENAME.key()))) { - ff = session.putAttribute(ff, SOURCE_FILE_NAME, originalParentFF.getAttribute(CoreAttributes.FILENAME.key())); - } else { - ff = session.putAttribute(ff, SOURCE_FILE_NAME, UNKNOWN_SHEET_NAME); - } - - //Update the CoreAttributes.FILENAME to have the .csv extension now. Also update MIME.TYPE - ff = session.putAttribute(ff, CoreAttributes.FILENAME.key(), updateFilenameToCSVExtension(ff.getAttribute(CoreAttributes.UUID.key()), - ff.getAttribute(CoreAttributes.FILENAME.key()), readConfig.getSheetName())); - ff = session.putAttribute(ff, CoreAttributes.MIME_TYPE.key(), CSV_MIME_TYPE); - - session.transfer(ff, SUCCESS); - - } catch (RuntimeException e) { - ff = session.putAttribute(ff, ConvertExcelToCSVProcessor.class.getName() + ".error", e.getMessage()); - session.transfer(ff, FAILURE); - } - } - - private String getSheetsNotFound(Map desiredSheets) { - return desiredSheets.entrySet().stream() - .filter(entry -> !entry.getValue()) - .map(Map.Entry::getKey) - .collect(Collectors.joining(",")); - } - - /** - * Uses the com.github.pjfanning streaming cell implementation to - * do most of the work of parsing the contents of the Excel sheet - * and outputs the contents as a (basic) CSV. - */ - private class SheetToCSV { - private final ExcelSheetReadConfig readConfig; - CSVFormat csvFormat; - private boolean firstCellOfRow; - private boolean skipRow; - private int currentRow = -1; - private int currentCol = -1; - private int rowCount = 0; - private int skippedColumns = 0; - private CSVPrinter printer; - private boolean firstRow = false; - private ArrayList fieldValues; - - public int getRowCount() { - return rowCount; - } - - public void setOutput(PrintStream output) { - final OutputStreamWriter streamWriter = new OutputStreamWriter(output, StandardCharsets.UTF_8); - - try { - printer = new CSVPrinter(streamWriter, csvFormat); - } catch (IOException e) { - throw new ProcessException("Failed to create CSV Printer.", e); - } - } - - public SheetToCSV(ExcelSheetReadConfig readConfig, CSVFormat csvFormat) { - this.readConfig = readConfig; - this.csvFormat = csvFormat; - } - - public void startRow(int rowNum) { - if (rowNum <= readConfig.getOverrideFirstRow()) { - skipRow = true; - return; - } - - // Prepare for this row - skipRow = false; - firstCellOfRow = true; - firstRow = currentRow == -1; - currentRow = rowNum; - currentCol = -1; - fieldValues = new ArrayList<>(); - } - - public void endRow() { - if(skipRow) { - return; - } - - if(firstRow) { - readConfig.setLastColumn(currentCol); - } - - //if there was no data in this row, don't write it - if(fieldValues.stream() - .noneMatch(string -> string != null && !string.isEmpty())) { - return; - } - - // Ensure the correct number of columns - int columnsToAdd = (readConfig.getLastColumn() - currentCol) - readConfig.getColumnsToSkip().size(); - for (int i = 0; i < columnsToAdd; i++) { - fieldValues.add(null); - } - - try { - printer.printRecord(fieldValues); - } catch (IOException e) { - getLogger().warn("Print Record failed", e); - } - - rowCount++; - } - - public void cell(Cell cell) { - if (skipRow) { - return; - } - - // Did we miss any cells? - int thisCol = cell.getColumnIndex(); - - //Use the first row of the file to decide on the area of data to export - if (firstRow && firstCellOfRow) { - readConfig.setFirstColumn(thisCol); - } - - //if this cell falls outside our area, or has been explicitly marked as a skipped column, return and don't write it out. - if (!firstRow && (thisCol < readConfig.getFirstColumn() || thisCol > readConfig.getLastColumn())) { - return; - } - - if (readConfig.getColumnsToSkip().contains(thisCol)) { - skippedColumns++; - return; - } - - int missedCols = (thisCol - readConfig.getFirstColumn()) - (currentCol - readConfig.getFirstColumn()) - 1; - if (firstCellOfRow) { - missedCols = (thisCol - readConfig.getFirstColumn()); - } - - missedCols -= skippedColumns; - - if (firstCellOfRow) { - firstCellOfRow = false; - } - - for (int i = 0; i < missedCols; i++) { - fieldValues.add(null); - } - currentCol = thisCol; - - String stringCellValue = cell.getStringCellValue(); - fieldValues.add(stringCellValue != null && !stringCellValue.isEmpty() ? stringCellValue : null); - - skippedColumns = 0; - } - - public void close() throws IOException { - printer.close(); - } - } - - /** - * Takes the original input filename and updates it by removing the file extension and replacing it with - * the .csv extension. - * - * @param origFileName Original filename from the input file. - * @return The new filename with the .csv extension that should be place in the output flowfile's attributes - */ - private String updateFilenameToCSVExtension(String nifiUUID, String origFileName, String sheetName) { - - StringBuilder stringBuilder = new StringBuilder(); - - if (StringUtils.isNotEmpty(origFileName)) { - String ext = FilenameUtils.getExtension(origFileName); - if (StringUtils.isNotEmpty(ext)) { - stringBuilder.append(StringUtils.replace(origFileName, ("." + ext), "")); - } else { - stringBuilder.append(origFileName); - } - } else { - stringBuilder.append(nifiUUID); - } - - stringBuilder.append("_"); - stringBuilder.append(sheetName); - stringBuilder.append("."); - stringBuilder.append("csv"); - - return stringBuilder.toString(); - } - - private static class ExcelSheetReadConfig { - public String getSheetName() { - return sheetName; - } - - public int getFirstColumn() { - return firstColumn; - } - - public void setFirstColumn(int value) { - this.firstColumn = value; - } - - public int getLastColumn() { - return lastColumn; - } - - public void setLastColumn(int lastColumn) { - this.lastColumn = lastColumn; - } - - public int getOverrideFirstRow() { - return overrideFirstRow; - } - - public List getColumnsToSkip() { - return columnsToSkip; - } - - private int firstColumn; - private int lastColumn; - private final int overrideFirstRow; - private final String sheetName; - private final List columnsToSkip; - - public ExcelSheetReadConfig(List columnsToSkip, int overrideFirstRow, String sheetName) { - - this.sheetName = sheetName; - this.columnsToSkip = columnsToSkip; - this.overrideFirstRow = overrideFirstRow; - } - } -} diff --git a/nifi-nar-bundles/nifi-poi-bundle/nifi-poi-processors/src/main/resources/META-INF/services/org.apache.nifi.processor.Processor b/nifi-nar-bundles/nifi-poi-bundle/nifi-poi-processors/src/main/resources/META-INF/services/org.apache.nifi.processor.Processor deleted file mode 100644 index 43baa0b62f..0000000000 --- a/nifi-nar-bundles/nifi-poi-bundle/nifi-poi-processors/src/main/resources/META-INF/services/org.apache.nifi.processor.Processor +++ /dev/null @@ -1,15 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -org.apache.nifi.processors.poi.ConvertExcelToCSVProcessor \ No newline at end of file diff --git a/nifi-nar-bundles/nifi-poi-bundle/nifi-poi-processors/src/main/resources/docs/org.apache.nifi.processors.poi.ConvertExcelToCSVProcessor/additionalDetails.html b/nifi-nar-bundles/nifi-poi-bundle/nifi-poi-processors/src/main/resources/docs/org.apache.nifi.processors.poi.ConvertExcelToCSVProcessor/additionalDetails.html deleted file mode 100644 index fbefa08015..0000000000 --- a/nifi-nar-bundles/nifi-poi-bundle/nifi-poi-processors/src/main/resources/docs/org.apache.nifi.processors.poi.ConvertExcelToCSVProcessor/additionalDetails.html +++ /dev/null @@ -1,97 +0,0 @@ - - - - - - ConvertExcelToCSVProcessor - - - - - -

How it extracts CSV data from a sheet

-

- ConvertExcelToCSVProcessor extracts CSV data with following rules: -

-
    -
  • Find the fist cell which has a value in it (the FirstCell).
  • -
  • Scan cells in the first row, starting from the FirstCell, - until it reaches to a cell after which no cell with a value can not be found in the row (the FirstRowLastCell).
  • -
  • Process the 2nd row and later, from the column of FirstCell to the column of FirstRowLastCell.
  • -
  • If a row does not have any cell that has a value, then the row is ignored.
  • -
- -

- As an example, the sheet shown below will be: -

- - - - - - - - - - - - - - - - -
row ABCDEFG
1
2 xyz
3 1
42 3
5 4
6 567
7 8
8
9 9
10
11
- -

- converted to following CSV: -

- -
-x,y,z
-1,,
-,3,
-,,4
-5,6,7
-,,9
-
- -
    -
  • C2(x) is the FirstCell, and E2(z) is the FirstRowLastCell.
  • -
  • A4(2) is ignored because it is out of range. So is F7(8).
  • -
  • Row 7 and 8 are ignored because those do not have a valid cell.
  • -
  • It is important to have a header row as shown in the example to define data area, - especially when a sheet includes empty cells.
  • -
- - - \ No newline at end of file diff --git a/nifi-nar-bundles/nifi-poi-bundle/nifi-poi-processors/src/test/java/org/apache/nifi/processors/poi/ConvertExcelToCSVProcessorTest.java b/nifi-nar-bundles/nifi-poi-bundle/nifi-poi-processors/src/test/java/org/apache/nifi/processors/poi/ConvertExcelToCSVProcessorTest.java deleted file mode 100644 index 41b48921e8..0000000000 --- a/nifi-nar-bundles/nifi-poi-bundle/nifi-poi-processors/src/test/java/org/apache/nifi/processors/poi/ConvertExcelToCSVProcessorTest.java +++ /dev/null @@ -1,578 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.nifi.processors.poi; - -import java.io.File; -import java.io.IOException; -import java.net.URL; -import java.text.DecimalFormatSymbols; -import java.time.LocalDateTime; -import java.time.format.DateTimeFormatter; -import java.util.HashMap; -import java.util.List; -import java.util.Map; - -import org.apache.nifi.csv.CSVUtils; -import org.apache.nifi.flowfile.attributes.CoreAttributes; -import org.apache.nifi.util.LogMessage; -import org.apache.nifi.util.MockFlowFile; -import org.apache.nifi.util.TestRunner; -import org.apache.nifi.util.TestRunners; -import org.junit.jupiter.api.BeforeEach; -import org.junit.jupiter.api.Test; - -import static org.junit.jupiter.api.Assertions.assertFalse; -import static org.junit.jupiter.api.Assertions.assertNotNull; -import static org.junit.jupiter.api.Assertions.assertTrue; -import static org.junit.jupiter.api.Assertions.assertEquals; - -public class ConvertExcelToCSVProcessorTest { - - private TestRunner testRunner; - - @BeforeEach - public void init() { - testRunner = TestRunners.newTestRunner(ConvertExcelToCSVProcessor.class); - } - - @Test - public void testMultipleSheetsGeneratesMultipleFlowFiles() throws IOException { - - Map attributes = new HashMap<>(); - attributes.put("test", "attribute"); - - final URL resourceUrl = getClass().getResource("/TwoSheets.xlsx"); - assertNotNull(resourceUrl); - - testRunner.enqueue(new File(resourceUrl.getPath()).toPath(), attributes); - testRunner.run(); - - testRunner.assertTransferCount(ConvertExcelToCSVProcessor.SUCCESS, 2); - testRunner.assertTransferCount(ConvertExcelToCSVProcessor.ORIGINAL, 1); - testRunner.assertTransferCount(ConvertExcelToCSVProcessor.FAILURE, 0); - - MockFlowFile ffSheetA = testRunner.getFlowFilesForRelationship(ConvertExcelToCSVProcessor.SUCCESS).get(0); - long rowsSheetA = Long.parseLong(ffSheetA.getAttribute(ConvertExcelToCSVProcessor.ROW_NUM)); - assertEquals(4, rowsSheetA); - assertTrue(ffSheetA.getAttribute(ConvertExcelToCSVProcessor.SHEET_NAME).equalsIgnoreCase("TestSheetA")); - assertEquals("TwoSheets.xlsx", ffSheetA.getAttribute(ConvertExcelToCSVProcessor.SOURCE_FILE_NAME)); - - //Since TestRunner.run() will create a random filename even if the attribute is set in enqueue manually we just check that "_{SHEETNAME}.csv is present - assertTrue(ffSheetA.getAttribute(CoreAttributes.FILENAME.key()).endsWith("_TestSheetA.csv")); - assertEquals("attribute", ffSheetA.getAttribute("test")); - - MockFlowFile ffSheetB = testRunner.getFlowFilesForRelationship(ConvertExcelToCSVProcessor.SUCCESS).get(1); - long rowsSheetB = Long.parseLong(ffSheetB.getAttribute(ConvertExcelToCSVProcessor.ROW_NUM)); - assertEquals(3, rowsSheetB); - assertTrue(ffSheetB.getAttribute(ConvertExcelToCSVProcessor.SHEET_NAME).equalsIgnoreCase("TestSheetB")); - assertEquals("TwoSheets.xlsx", ffSheetB.getAttribute(ConvertExcelToCSVProcessor.SOURCE_FILE_NAME)); - - //Since TestRunner.run() will create a random filename even if the attribute is set in enqueue manually we just check that "_{SHEETNAME}.csv is present - assertTrue(ffSheetB.getAttribute(CoreAttributes.FILENAME.key()).endsWith("_TestSheetB.csv")); - assertEquals("attribute", ffSheetB.getAttribute("test")); - - } - - @Test - public void testDataFormatting() { - testRunner.enqueue(getClass().getResourceAsStream("/dataformatting.xlsx")); - - testRunner.setProperty(ConvertExcelToCSVProcessor.FORMAT_VALUES, "false"); - - testRunner.run(); - - testRunner.assertTransferCount(ConvertExcelToCSVProcessor.SUCCESS, 1); - testRunner.assertTransferCount(ConvertExcelToCSVProcessor.ORIGINAL, 1); - testRunner.assertTransferCount(ConvertExcelToCSVProcessor.FAILURE, 0); - - MockFlowFile ff = testRunner.getFlowFilesForRelationship(ConvertExcelToCSVProcessor.SUCCESS).get(0); - long rowsSheet = Long.parseLong(ff.getAttribute(ConvertExcelToCSVProcessor.ROW_NUM)); - assertEquals(9, rowsSheet); - - ff.assertContentEquals("Numbers,Timestamps,Money\n" + - "1234.4559999999999,42736.5,123.45\n" + - "1234.4559999999999,42736.5,123.45\n" + - "1234.4559999999999,42736.5,123.45\n" + - "1234.4559999999999,42736.5,1023.45\n" + - "1234.4559999999999,42736.5,1023.45\n" + - "987654321,42736.5,1023.45\n" + - "987654321,,\n" + - "987654321,,\n"); - } - - @Test - public void testQuoting() { - testRunner.enqueue(getClass().getResourceAsStream("/dataformatting.xlsx")); - - testRunner.setProperty(CSVUtils.QUOTE_MODE, CSVUtils.QUOTE_MINIMAL); - testRunner.setProperty(ConvertExcelToCSVProcessor.FORMAT_VALUES, "true"); - - testRunner.run(); - - testRunner.assertTransferCount(ConvertExcelToCSVProcessor.SUCCESS, 1); - testRunner.assertTransferCount(ConvertExcelToCSVProcessor.ORIGINAL, 1); - testRunner.assertTransferCount(ConvertExcelToCSVProcessor.FAILURE, 0); - - MockFlowFile ff = testRunner.getFlowFilesForRelationship(ConvertExcelToCSVProcessor.SUCCESS).get(0); - long rowsSheet = Long.parseLong(ff.getAttribute(ConvertExcelToCSVProcessor.ROW_NUM)); - assertEquals(9, rowsSheet); - - LocalDateTime localDt = LocalDateTime.of(2017, 1, 1, 12, 0, 0); - DecimalFormatSymbols decimalFormatSymbols = DecimalFormatSymbols.getInstance(); - char decimalSeparator = decimalFormatSymbols.getDecimalSeparator(); - char groupingSeparator = decimalFormatSymbols.getGroupingSeparator(); - ff.assertContentEquals(("Numbers,Timestamps,Money\n" + - addQuotingIfNeeded(String.format("1234%1$s456", decimalSeparator)) + "," + DateTimeFormatter.ofPattern("d/M/yy").format(localDt) + "," + - addQuotingIfNeeded(String.format("$ 123%1$s45", decimalSeparator)) + "\n" + - addQuotingIfNeeded(String.format("1234%1$s46", decimalSeparator)) + "," + DateTimeFormatter.ofPattern("hh:mm:ss a").format(localDt) + "," + - addQuotingIfNeeded(String.format("£ 123%1$s45", decimalSeparator)) + "\n" + - addQuotingIfNeeded(String.format("1234%1$s5", decimalSeparator)) + ",\"" + DateTimeFormatter.ofPattern("EEEE, MMMM dd, yyyy").format(localDt) + "\"," + - addQuotingIfNeeded(String.format("¥ 123%1$s45", decimalSeparator)) + "\n" + - addQuotingIfNeeded(String.format("1%2$s234%1$s46", decimalSeparator, groupingSeparator)) + "," + DateTimeFormatter.ofPattern("d/M/yy HH:mm").format(localDt) + "," + - addQuotingIfNeeded(String.format("$ 1%2$s023%1$s45", decimalSeparator, groupingSeparator)) + "\n" + - addQuotingIfNeeded(String.format("1%2$s234%1$s4560", decimalSeparator, groupingSeparator)) + "," + DateTimeFormatter.ofPattern("hh:mm a").format(localDt) + "," + - addQuotingIfNeeded(String.format("£ 1%2$s023%1$s45", decimalSeparator, groupingSeparator)) + "\n" + - addQuotingIfNeeded(String.format("9%1$s88E+08", decimalSeparator)) + "," + DateTimeFormatter.ofPattern("yyyy/MM/dd/ HH:mm").format(localDt) + "," + - addQuotingIfNeeded(String.format("¥ 1%2$s023%1$s45", decimalSeparator, groupingSeparator)) + "\n" + - addQuotingIfNeeded(String.format("9%1$s877E+08", decimalSeparator)) + ",,\n" + - addQuotingIfNeeded(String.format("9%1$s8765E+08", decimalSeparator)) + ",,\n").replace("E+", getExponentSeparator(decimalFormatSymbols))); - } - - /** - * Workaround for interaction between {@link DecimalFormatSymbols} and use of custom {@link java.util.Locale}. - */ - private static String getExponentSeparator(final DecimalFormatSymbols decimalFormatSymbols) { - final String exponentSeparator = decimalFormatSymbols.getExponentSeparator(); - return (exponentSeparator.equals("e") ? "e" : exponentSeparator + "+"); - } - - @Test - public void testSkipRows() { - testRunner.enqueue(getClass().getResourceAsStream("/dataformatting.xlsx")); - - testRunner.setProperty(ConvertExcelToCSVProcessor.ROWS_TO_SKIP, "2"); - testRunner.setProperty(ConvertExcelToCSVProcessor.FORMAT_VALUES, "true"); - - testRunner.run(); - - testRunner.assertTransferCount(ConvertExcelToCSVProcessor.SUCCESS, 1); - testRunner.assertTransferCount(ConvertExcelToCSVProcessor.ORIGINAL, 1); - testRunner.assertTransferCount(ConvertExcelToCSVProcessor.FAILURE, 0); - - MockFlowFile ff = testRunner.getFlowFilesForRelationship(ConvertExcelToCSVProcessor.SUCCESS).get(0); - long rowsSheet = Long.parseLong(ff.getAttribute(ConvertExcelToCSVProcessor.ROW_NUM)); - assertEquals(7, rowsSheet, "Row count does match expected value."); - - LocalDateTime localDt = LocalDateTime.of(2017, 1, 1, 12, 0, 0); - DecimalFormatSymbols decimalFormatSymbols = DecimalFormatSymbols.getInstance(); - String decimalSeparator = decimalFormatSymbols.getDecimalSeparator() == ',' ? "\\," : String.valueOf(decimalFormatSymbols.getDecimalSeparator()); - String groupingSeparator = decimalFormatSymbols.getGroupingSeparator() == ',' ? "\\," : String.valueOf(decimalFormatSymbols.getGroupingSeparator()); - ff.assertContentEquals(String.format("1234%1$s46," + DateTimeFormatter.ofPattern("hh:mm:ss a").format(localDt) + ",£ 123%1$s45\n" + - "1234%1$s5," + DateTimeFormatter.ofPattern("EEEE\\, MMMM dd\\, yyyy").format(localDt) + ",¥ 123%1$s45\n" + - "1%2$s234%1$s46," + DateTimeFormatter.ofPattern("d/M/yy HH:mm").format(localDt) + ",$ 1%2$s023%1$s45\n" + - "1%2$s234%1$s4560," + DateTimeFormatter.ofPattern("hh:mm a").format(localDt) + ",£ 1%2$s023%1$s45\n" + - "9%1$s88E+08," + DateTimeFormatter.ofPattern("yyyy/MM/dd/ HH:mm").format(localDt) + ",¥ 1%2$s023%1$s45\n" + - "9%1$s877E+08,,\n" + - "9%1$s8765E+08,,\n", decimalSeparator, groupingSeparator).replace("E+", getExponentSeparator(decimalFormatSymbols))); - } - - @Test - public void testSkipRowsWithEL() { - Map attributes = new HashMap<>(); - attributes.put("rowsToSkip", "2"); - testRunner.enqueue(getClass().getResourceAsStream("/dataformatting.xlsx"), attributes); - - testRunner.setProperty(ConvertExcelToCSVProcessor.ROWS_TO_SKIP, "${rowsToSkip}"); - testRunner.setProperty(ConvertExcelToCSVProcessor.FORMAT_VALUES, "true"); - - testRunner.run(); - - testRunner.assertTransferCount(ConvertExcelToCSVProcessor.SUCCESS, 1); - testRunner.assertTransferCount(ConvertExcelToCSVProcessor.ORIGINAL, 1); - testRunner.assertTransferCount(ConvertExcelToCSVProcessor.FAILURE, 0); - - MockFlowFile ff = testRunner.getFlowFilesForRelationship(ConvertExcelToCSVProcessor.SUCCESS).get(0); - long rowsSheet = Long.parseLong(ff.getAttribute(ConvertExcelToCSVProcessor.ROW_NUM)); - assertEquals(7, rowsSheet, "Row count does match expected value."); - - LocalDateTime localDt = LocalDateTime.of(2017, 1, 1, 12, 0, 0); - DecimalFormatSymbols decimalFormatSymbols = DecimalFormatSymbols.getInstance(); - String decimalSeparator = decimalFormatSymbols.getDecimalSeparator() == ',' ? "\\," : String.valueOf(decimalFormatSymbols.getDecimalSeparator()); - String groupingSeparator = decimalFormatSymbols.getGroupingSeparator() == ',' ? "\\," : String.valueOf(decimalFormatSymbols.getGroupingSeparator()); - ff.assertContentEquals(String.format("1234%1$s46," + DateTimeFormatter.ofPattern("hh:mm:ss a").format(localDt) + ",£ 123%1$s45\n" + - "1234%1$s5," + DateTimeFormatter.ofPattern("EEEE\\, MMMM dd\\, yyyy").format(localDt) + ",¥ 123%1$s45\n" + - "1%2$s234%1$s46," + DateTimeFormatter.ofPattern("d/M/yy HH:mm").format(localDt) + ",$ 1%2$s023%1$s45\n" + - "1%2$s234%1$s4560," + DateTimeFormatter.ofPattern("hh:mm a").format(localDt) + ",£ 1%2$s023%1$s45\n" + - "9%1$s88E+08," + DateTimeFormatter.ofPattern("yyyy/MM/dd/ HH:mm").format(localDt) + ",¥ 1%2$s023%1$s45\n" + - "9%1$s877E+08,,\n" + - "9%1$s8765E+08,,\n", decimalSeparator, groupingSeparator).replace("E+", getExponentSeparator(decimalFormatSymbols))); - } - - @Test - public void testSkipColumns() throws Exception { - testRunner.enqueue(new File("src/test/resources/dataformatting.xlsx").toPath()); - - testRunner.setProperty(ConvertExcelToCSVProcessor.COLUMNS_TO_SKIP, "2"); - testRunner.setProperty(ConvertExcelToCSVProcessor.FORMAT_VALUES, "true"); - - testRunner.run(); - - testRunner.assertTransferCount(ConvertExcelToCSVProcessor.SUCCESS, 1); - testRunner.assertTransferCount(ConvertExcelToCSVProcessor.ORIGINAL, 1); - testRunner.assertTransferCount(ConvertExcelToCSVProcessor.FAILURE, 0); - - MockFlowFile ff = testRunner.getFlowFilesForRelationship(ConvertExcelToCSVProcessor.SUCCESS).get(0); - long rowsSheet = Long.parseLong(ff.getAttribute(ConvertExcelToCSVProcessor.ROW_NUM)); - assertEquals(9, rowsSheet); - - DecimalFormatSymbols decimalFormatSymbols = DecimalFormatSymbols.getInstance(); - String decimalSeparator = decimalFormatSymbols.getDecimalSeparator() == ',' ? "\\," : String.valueOf(decimalFormatSymbols.getDecimalSeparator()); - String groupingSeparator = decimalFormatSymbols.getGroupingSeparator() == ',' ? "\\," : String.valueOf(decimalFormatSymbols.getGroupingSeparator()); - ff.assertContentEquals(String.format("Numbers,Money\n" + - "1234%1$s456,$ 123%1$s45\n" + - "1234%1$s46,£ 123%1$s45\n" + - "1234%1$s5,¥ 123%1$s45\n" + - "1%2$s234%1$s46,$ 1%2$s023%1$s45\n" + - "1%2$s234%1$s4560,£ 1%2$s023%1$s45\n" + - "9%1$s88E+08,¥ 1%2$s023%1$s45\n" + - "9%1$s877E+08,\n" + - "9%1$s8765E+08,\n", decimalSeparator, groupingSeparator).replace("E+", getExponentSeparator(decimalFormatSymbols))); - } - - @Test - public void testSkipColumnsWithEL() throws Exception { - Map attributes = new HashMap<>(); - attributes.put("columnsToSkip", "2"); - testRunner.enqueue(new File("src/test/resources/dataformatting.xlsx").toPath(), attributes); - - testRunner.setProperty(ConvertExcelToCSVProcessor.COLUMNS_TO_SKIP, "${columnsToSkip}"); - testRunner.setProperty(ConvertExcelToCSVProcessor.FORMAT_VALUES, "true"); - - testRunner.run(); - - testRunner.assertTransferCount(ConvertExcelToCSVProcessor.SUCCESS, 1); - testRunner.assertTransferCount(ConvertExcelToCSVProcessor.ORIGINAL, 1); - testRunner.assertTransferCount(ConvertExcelToCSVProcessor.FAILURE, 0); - - MockFlowFile ff = testRunner.getFlowFilesForRelationship(ConvertExcelToCSVProcessor.SUCCESS).get(0); - long rowsSheet = Long.parseLong(ff.getAttribute(ConvertExcelToCSVProcessor.ROW_NUM)); - assertEquals(9, rowsSheet); - - DecimalFormatSymbols decimalFormatSymbols = DecimalFormatSymbols.getInstance(); - String decimalSeparator = decimalFormatSymbols.getDecimalSeparator() == ',' ? "\\," : String.valueOf(decimalFormatSymbols.getDecimalSeparator()); - String groupingSeparator = decimalFormatSymbols.getGroupingSeparator() == ',' ? "\\," : String.valueOf(decimalFormatSymbols.getGroupingSeparator()); - ff.assertContentEquals(String.format("Numbers,Money\n" + - "1234%1$s456,$ 123%1$s45\n" + - "1234%1$s46,£ 123%1$s45\n" + - "1234%1$s5,¥ 123%1$s45\n" + - "1%2$s234%1$s46,$ 1%2$s023%1$s45\n" + - "1%2$s234%1$s4560,£ 1%2$s023%1$s45\n" + - "9%1$s88E+08,¥ 1%2$s023%1$s45\n" + - "9%1$s877E+08,\n" + - "9%1$s8765E+08,\n", decimalSeparator, groupingSeparator).replace("E+", getExponentSeparator(decimalFormatSymbols))); - } - - @Test - public void testCustomDelimiters() throws Exception { - testRunner.enqueue(new File("src/test/resources/dataformatting.xlsx").toPath()); - - testRunner.setProperty(CSVUtils.VALUE_SEPARATOR, "|"); - testRunner.setProperty(CSVUtils.RECORD_SEPARATOR, "\\r\\n"); - testRunner.setProperty(ConvertExcelToCSVProcessor.FORMAT_VALUES, "true"); - - testRunner.run(); - - testRunner.assertTransferCount(ConvertExcelToCSVProcessor.SUCCESS, 1); - testRunner.assertTransferCount(ConvertExcelToCSVProcessor.ORIGINAL, 1); - testRunner.assertTransferCount(ConvertExcelToCSVProcessor.FAILURE, 0); - - MockFlowFile ff = testRunner.getFlowFilesForRelationship(ConvertExcelToCSVProcessor.SUCCESS).get(0); - long rowsSheet = Long.parseLong(ff.getAttribute(ConvertExcelToCSVProcessor.ROW_NUM)); - assertEquals(9, rowsSheet); - - LocalDateTime localDt = LocalDateTime.of(2017, 1, 1, 12, 0, 0); - DecimalFormatSymbols decimalFormatSymbols = DecimalFormatSymbols.getInstance(); - String valueSeparator = testRunner.getProcessContext().getProperty(CSVUtils.VALUE_SEPARATOR).evaluateAttributeExpressions(ff).getValue(); - String decimalSeparator = (String.valueOf(decimalFormatSymbols.getDecimalSeparator()).equals(valueSeparator)) - ? ("\\" + decimalFormatSymbols.getDecimalSeparator()) : String.valueOf(decimalFormatSymbols.getDecimalSeparator()); - String groupingSeparator = String.valueOf(decimalFormatSymbols.getGroupingSeparator()).equals(valueSeparator) - ? "\\" + decimalFormatSymbols.getGroupingSeparator() : String.valueOf(decimalFormatSymbols.getGroupingSeparator()); - ff.assertContentEquals(String.format("Numbers|Timestamps|Money\r\n" + - "1234%1$s456|" + DateTimeFormatter.ofPattern("d/M/yy").format(localDt) + "|$ 123%1$s45\r\n" + - "1234%1$s46|" + DateTimeFormatter.ofPattern("hh:mm:ss a").format(localDt) + "|£ 123%1$s45\r\n" + - "1234%1$s5|" + DateTimeFormatter.ofPattern("EEEE, MMMM dd, yyyy").format(localDt) + "|¥ 123%1$s45\r\n" + - "1%2$s234%1$s46|" + DateTimeFormatter.ofPattern("d/M/yy HH:mm").format(localDt) + "|$ 1%2$s023%1$s45\r\n" + - "1%2$s234%1$s4560|" + DateTimeFormatter.ofPattern("hh:mm a").format(localDt) + "|£ 1%2$s023%1$s45\r\n" + - "9%1$s88E+08|" + DateTimeFormatter.ofPattern("yyyy/MM/dd/ HH:mm").format(localDt) + "|¥ 1%2$s023%1$s45\r\n" + - "9%1$s877E+08||\r\n" + - "9%1$s8765E+08||\r\n", decimalSeparator, groupingSeparator).replace("E+", getExponentSeparator(decimalFormatSymbols))); - } - - @Test - public void testCustomValueSeparatorWithEL() throws Exception { - Map attributes = new HashMap<>(); - attributes.put("csv.delimiter", "|"); - testRunner.enqueue(new File("src/test/resources/dataformatting.xlsx").toPath(), attributes); - - testRunner.setProperty(CSVUtils.VALUE_SEPARATOR, "${csv.delimiter}"); - testRunner.setProperty(ConvertExcelToCSVProcessor.FORMAT_VALUES, "true"); - - testRunner.run(); - - testRunner.assertTransferCount(ConvertExcelToCSVProcessor.SUCCESS, 1); - testRunner.assertTransferCount(ConvertExcelToCSVProcessor.ORIGINAL, 1); - testRunner.assertTransferCount(ConvertExcelToCSVProcessor.FAILURE, 0); - - MockFlowFile ff = testRunner.getFlowFilesForRelationship(ConvertExcelToCSVProcessor.SUCCESS).get(0); - long rowsSheet = Long.parseLong(ff.getAttribute(ConvertExcelToCSVProcessor.ROW_NUM)); - assertEquals(9, rowsSheet); - - LocalDateTime localDt = LocalDateTime.of(2017, 1, 1, 12, 0, 0); - DecimalFormatSymbols decimalFormatSymbols = DecimalFormatSymbols.getInstance(); - String valueSeparator = testRunner.getProcessContext().getProperty(CSVUtils.VALUE_SEPARATOR).evaluateAttributeExpressions(ff).getValue(); - String decimalSeparator = (String.valueOf(decimalFormatSymbols.getDecimalSeparator()).equals(valueSeparator)) - ? ("\\" + decimalFormatSymbols.getDecimalSeparator()) : String.valueOf(decimalFormatSymbols.getDecimalSeparator()); - String groupingSeparator = String.valueOf(decimalFormatSymbols.getGroupingSeparator()).equals(valueSeparator) - ? "\\" + decimalFormatSymbols.getGroupingSeparator() : String.valueOf(decimalFormatSymbols.getGroupingSeparator()); - ff.assertContentEquals(String.format("Numbers|Timestamps|Money\n" + - "1234%1$s456|" + DateTimeFormatter.ofPattern("d/M/yy").format(localDt) + "|$ 123%1$s45\n" + - "1234%1$s46|" + DateTimeFormatter.ofPattern("hh:mm:ss a").format(localDt) + "|£ 123%1$s45\n" + - "1234%1$s5|" + DateTimeFormatter.ofPattern("EEEE, MMMM dd, yyyy").format(localDt) + "|¥ 123%1$s45\n" + - "1%2$s234%1$s46|" + DateTimeFormatter.ofPattern("d/M/yy HH:mm").format(localDt) + "|$ 1%2$s023%1$s45\n" + - "1%2$s234%1$s4560|" + DateTimeFormatter.ofPattern("hh:mm a").format(localDt) + "|£ 1%2$s023%1$s45\n" + - "9%1$s88E+08|" + DateTimeFormatter.ofPattern("yyyy/MM/dd/ HH:mm").format(localDt) + "|¥ 1%2$s023%1$s45\n" + - "9%1$s877E+08||\n" + - "9%1$s8765E+08||\n", decimalSeparator, groupingSeparator).replace("E+", getExponentSeparator(decimalFormatSymbols))); - } - - @Test - public void testCustomQuoteCharWithEL() throws Exception { - Map attributes = new HashMap<>(); - attributes.put("csv.quote", "'"); - testRunner.enqueue(new File("src/test/resources/dataformatting.xlsx").toPath(), attributes); - - testRunner.setProperty(CSVUtils.QUOTE_CHAR, "${csv.quote}"); - testRunner.setProperty(ConvertExcelToCSVProcessor.FORMAT_VALUES, "true"); - testRunner.setProperty(CSVUtils.QUOTE_MODE, CSVUtils.QUOTE_ALL); - - testRunner.run(); - - testRunner.assertTransferCount(ConvertExcelToCSVProcessor.SUCCESS, 1); - testRunner.assertTransferCount(ConvertExcelToCSVProcessor.ORIGINAL, 1); - testRunner.assertTransferCount(ConvertExcelToCSVProcessor.FAILURE, 0); - - MockFlowFile ff = testRunner.getFlowFilesForRelationship(ConvertExcelToCSVProcessor.SUCCESS).get(0); - long rowsSheet = Long.parseLong(ff.getAttribute(ConvertExcelToCSVProcessor.ROW_NUM)); - assertEquals(9, rowsSheet); - - LocalDateTime localDt = LocalDateTime.of(2017, 1, 1, 12, 0, 0); - String quoteCharValue = testRunner.getProcessContext().getProperty(CSVUtils.QUOTE_CHAR).evaluateAttributeExpressions(ff).getValue(); - DecimalFormatSymbols decimalFormatSymbols = DecimalFormatSymbols.getInstance(); - char decimalSeparator = decimalFormatSymbols.getDecimalSeparator(); - char groupingSeparator = decimalFormatSymbols.getGroupingSeparator(); - ff.assertContentEquals(("'Numbers','Timestamps','Money'\n" + - addQuotingIfNeeded(String.format("1234%1$s456", decimalSeparator), quoteCharValue, true) + "," + quoteCharValue + - DateTimeFormatter.ofPattern("d/M/yy").format(localDt) + quoteCharValue + "," + - addQuotingIfNeeded(String.format("$ 123%1$s45", decimalSeparator), quoteCharValue, true) + "\n" + - addQuotingIfNeeded(String.format("1234%1$s46", decimalSeparator), quoteCharValue, true) + "," + quoteCharValue + - DateTimeFormatter.ofPattern("hh:mm:ss a").format(localDt) + quoteCharValue + "," + - addQuotingIfNeeded(String.format("£ 123%1$s45", decimalSeparator), quoteCharValue, true) + "\n" + - addQuotingIfNeeded(String.format("1234%1$s5", decimalSeparator), quoteCharValue, true) + "," + quoteCharValue + - DateTimeFormatter.ofPattern("EEEE, MMMM dd, yyyy").format(localDt) + quoteCharValue + "," + - addQuotingIfNeeded(String.format("¥ 123%1$s45", decimalSeparator), quoteCharValue, true) + "\n" + - addQuotingIfNeeded(String.format("1%2$s234%1$s46", decimalSeparator, groupingSeparator), quoteCharValue, true) + "," + quoteCharValue + - DateTimeFormatter.ofPattern("d/M/yy HH:mm").format(localDt) + quoteCharValue + "," + - addQuotingIfNeeded(String.format("$ 1%2$s023%1$s45", decimalSeparator, groupingSeparator), quoteCharValue, true) + "\n" + - addQuotingIfNeeded(String.format("1%2$s234%1$s4560", decimalSeparator, groupingSeparator), quoteCharValue, true) + "," + quoteCharValue + - DateTimeFormatter.ofPattern("hh:mm a").format(localDt) + quoteCharValue + "," + - addQuotingIfNeeded(String.format("£ 1%2$s023%1$s45", decimalSeparator, groupingSeparator), quoteCharValue, true) + "\n" + - addQuotingIfNeeded(String.format("9%1$s88E+08", decimalSeparator), quoteCharValue, true) + "," + quoteCharValue + - DateTimeFormatter.ofPattern("yyyy/MM/dd/ HH:mm").format(localDt) + quoteCharValue + "," + - addQuotingIfNeeded(String.format("¥ 1%2$s023%1$s45", decimalSeparator, groupingSeparator), quoteCharValue, true) + "\n" + - addQuotingIfNeeded(String.format("9%1$s877E+08", decimalSeparator), quoteCharValue, true) + ",,\n" + - addQuotingIfNeeded(String.format("9%1$s8765E+08", decimalSeparator), quoteCharValue, true) + ",,\n").replace("E+", getExponentSeparator(decimalFormatSymbols))); - } - - @Test - public void testCustomEscapeCharWithEL() throws Exception { - Map attributes = new HashMap<>(); - attributes.put("csv.escape", "^"); - testRunner.enqueue(new File("src/test/resources/dataformatting.xlsx").toPath(), attributes); - - testRunner.setProperty(CSVUtils.ESCAPE_CHAR, "${csv.escape}"); - testRunner.setProperty(ConvertExcelToCSVProcessor.FORMAT_VALUES, "true"); - - testRunner.run(); - - testRunner.assertTransferCount(ConvertExcelToCSVProcessor.SUCCESS, 1); - testRunner.assertTransferCount(ConvertExcelToCSVProcessor.ORIGINAL, 1); - testRunner.assertTransferCount(ConvertExcelToCSVProcessor.FAILURE, 0); - - MockFlowFile ff = testRunner.getFlowFilesForRelationship(ConvertExcelToCSVProcessor.SUCCESS).get(0); - long rowsSheet = Long.parseLong(ff.getAttribute(ConvertExcelToCSVProcessor.ROW_NUM)); - assertEquals(9, rowsSheet); - - LocalDateTime localDt = LocalDateTime.of(2017, 1, 1, 12, 0, 0); - DecimalFormatSymbols decimalFormatSymbols = DecimalFormatSymbols.getInstance(); - String escapeCharValue = testRunner.getProcessContext().getProperty(CSVUtils.ESCAPE_CHAR).evaluateAttributeExpressions(ff).getValue(); - String decimalSeparator = String.valueOf(decimalFormatSymbols.getDecimalSeparator()).equals(",") - ? escapeCharValue + decimalFormatSymbols.getDecimalSeparator() : String.valueOf(decimalFormatSymbols.getDecimalSeparator()); - String groupingSeparator = String.valueOf(decimalFormatSymbols.getGroupingSeparator()).equals(",") - ? escapeCharValue + decimalFormatSymbols.getGroupingSeparator() : String.valueOf(decimalFormatSymbols.getGroupingSeparator()); - ff.assertContentEquals(String.format("Numbers,Timestamps,Money\n" + - "1234%1$s456," + DateTimeFormatter.ofPattern("d/M/yy").format(localDt) + ",$ 123%1$s45\n" + - "1234%1$s46," + DateTimeFormatter.ofPattern("hh:mm:ss a").format(localDt) + ",£ 123%1$s45\n" + - "1234%1$s5," + DateTimeFormatter.ofPattern(String.format("EEEE%1$s, MMMM dd%1$s, yyyy", escapeCharValue)).format(localDt) + ",¥ 123%1$s45\n" + - "1%2$s234%1$s46," + DateTimeFormatter.ofPattern("d/M/yy HH:mm").format(localDt) + ",$ 1%2$s023%1$s45\n" + - "1%2$s234%1$s4560," + DateTimeFormatter.ofPattern("hh:mm a").format(localDt) + ",£ 1%2$s023%1$s45\n" + - "9%1$s88E+08," + DateTimeFormatter.ofPattern("yyyy/MM/dd/ HH:mm").format(localDt) + ",¥ 1%2$s023%1$s45\n" + - "9%1$s877E+08,,\n" + - "9%1$s8765E+08,,\n", decimalSeparator, groupingSeparator).replace("E+", getExponentSeparator(decimalFormatSymbols))); - } - - /** - * Validates that all sheets in the Excel document are exported. - * - * @throws Exception - * Any exception thrown during execution. - */ - @Test - public void testProcessAllSheets() throws Exception { - - testRunner.enqueue(new File("src/test/resources/CollegeScorecard.xlsx").toPath()); - testRunner.run(); - - testRunner.assertTransferCount(ConvertExcelToCSVProcessor.SUCCESS, 1); - testRunner.assertTransferCount(ConvertExcelToCSVProcessor.ORIGINAL, 1); - testRunner.assertTransferCount(ConvertExcelToCSVProcessor.FAILURE, 0); - - MockFlowFile ff = testRunner.getFlowFilesForRelationship(ConvertExcelToCSVProcessor.SUCCESS).get(0); - long l = Long.parseLong(ff.getAttribute(ConvertExcelToCSVProcessor.ROW_NUM)); - assertEquals(10, l); - - testRunner.clearProvenanceEvents(); - testRunner.clearTransferState(); - - testRunner.enqueue(new File("src/test/resources/TwoSheets.xlsx").toPath()); - testRunner.run(); - - testRunner.assertTransferCount(ConvertExcelToCSVProcessor.SUCCESS, 2); - testRunner.assertTransferCount(ConvertExcelToCSVProcessor.ORIGINAL, 1); - testRunner.assertTransferCount(ConvertExcelToCSVProcessor.FAILURE, 0); - - ff = testRunner.getFlowFilesForRelationship(ConvertExcelToCSVProcessor.SUCCESS).get(0); - l = Long.parseLong(ff.getAttribute(ConvertExcelToCSVProcessor.ROW_NUM)); - assertEquals(4, l); - - ff = testRunner.getFlowFilesForRelationship(ConvertExcelToCSVProcessor.SUCCESS).get(1); - l = Long.parseLong(ff.getAttribute(ConvertExcelToCSVProcessor.ROW_NUM)); - assertEquals(3, l); - } - - /** - * Validates that the manually specified sheet is exported from the Excel document. - * - * @throws Exception - * Any exception thrown during execution. - */ - @Test - public void testProcessASpecificSheetThatDoesExist() throws Exception { - - testRunner.setProperty(ConvertExcelToCSVProcessor.DESIRED_SHEETS, "Scorecard"); - testRunner.enqueue(new File("src/test/resources/CollegeScorecard.xlsx").toPath()); - testRunner.run(); - - testRunner.assertTransferCount(ConvertExcelToCSVProcessor.SUCCESS, 1); - testRunner.assertTransferCount(ConvertExcelToCSVProcessor.ORIGINAL, 1); - testRunner.assertTransferCount(ConvertExcelToCSVProcessor.FAILURE, 0); - - MockFlowFile ff = testRunner.getFlowFilesForRelationship(ConvertExcelToCSVProcessor.SUCCESS).get(0); - long l = Long.parseLong(ff.getAttribute(ConvertExcelToCSVProcessor.ROW_NUM)); - assertEquals(10, l); - } - - /** - * Tests for a syntactically valid Excel XSSF document with a manually specified Excel sheet that does not exist. - * In this scenario only the Original relationship should be invoked. - * - * @throws Exception - * Any exception thrown during execution. - */ - @Test - public void testNonExistantSpecifiedSheetName() throws Exception { - - testRunner.setProperty(ConvertExcelToCSVProcessor.DESIRED_SHEETS, "NopeIDoNotExist"); - testRunner.enqueue(new File("src/test/resources/CollegeScorecard.xlsx").toPath()); - testRunner.run(); - - testRunner.assertTransferCount(ConvertExcelToCSVProcessor.SUCCESS, 0); //We aren't expecting any output to success here because the sheet doesn't exist - testRunner.assertTransferCount(ConvertExcelToCSVProcessor.ORIGINAL, 1); - testRunner.assertTransferCount(ConvertExcelToCSVProcessor.FAILURE, 0); - assertFalse(testRunner.getLogger().getWarnMessages().isEmpty()); - } - - /** - * Validates that a sheet contains blank cells can be converted to a CSV without missing columns. - * - * @throws Exception - * Any exception thrown during execution. - */ - @Test - public void testProcessASheetWithBlankCells() throws Exception { - - testRunner.setProperty(ConvertExcelToCSVProcessor.DESIRED_SHEETS, "Sheet1"); - testRunner.enqueue(new File("src/test/resources/with-blank-cells.xlsx").toPath()); - testRunner.run(); - - testRunner.assertTransferCount(ConvertExcelToCSVProcessor.SUCCESS, 1); - testRunner.assertTransferCount(ConvertExcelToCSVProcessor.ORIGINAL, 1); - testRunner.assertTransferCount(ConvertExcelToCSVProcessor.FAILURE, 0); - - MockFlowFile ff = testRunner.getFlowFilesForRelationship(ConvertExcelToCSVProcessor.SUCCESS).get(0); - long l = Long.parseLong(ff.getAttribute(ConvertExcelToCSVProcessor.ROW_NUM)); - assertEquals(8, l); - - ff.assertContentEquals(new File("src/test/resources/with-blank-cells.csv")); - } - - /** - * Tests for graceful handling and error messaging of unsupported .XLS files. - */ - @Test - public void testHandleUnsupportedXlsFile() throws Exception { - - testRunner.enqueue(new File("src/test/resources/Unsupported.xls").toPath()); - testRunner.run(); - - testRunner.assertTransferCount(ConvertExcelToCSVProcessor.SUCCESS, 0); - testRunner.assertTransferCount(ConvertExcelToCSVProcessor.ORIGINAL, 0); - testRunner.assertTransferCount(ConvertExcelToCSVProcessor.FAILURE, 1); - - List errorMessages = testRunner.getLogger().getErrorMessages(); - assertEquals(1, errorMessages.size()); - String messageText = errorMessages.get(0).getMsg(); - assertTrue(messageText.contains("Excel") && messageText.contains("OLE2")); - } - - private String addQuotingIfNeeded(String csvField) { - return addQuotingIfNeeded(csvField, "\"", false); - } - - private String addQuotingIfNeeded(String csvField, String csvQuote, boolean force) { - return csvField.contains(",") || force ? String.format("%2$s%1$s%2$s", csvField, csvQuote) : csvField; - } -} \ No newline at end of file diff --git a/nifi-nar-bundles/nifi-poi-bundle/nifi-poi-processors/src/test/resources/CollegeScorecard.xlsx b/nifi-nar-bundles/nifi-poi-bundle/nifi-poi-processors/src/test/resources/CollegeScorecard.xlsx deleted file mode 100644 index 230ad0e669..0000000000 Binary files a/nifi-nar-bundles/nifi-poi-bundle/nifi-poi-processors/src/test/resources/CollegeScorecard.xlsx and /dev/null differ diff --git a/nifi-nar-bundles/nifi-poi-bundle/nifi-poi-processors/src/test/resources/TwoSheets.xlsx b/nifi-nar-bundles/nifi-poi-bundle/nifi-poi-processors/src/test/resources/TwoSheets.xlsx deleted file mode 100644 index f4977b1952..0000000000 Binary files a/nifi-nar-bundles/nifi-poi-bundle/nifi-poi-processors/src/test/resources/TwoSheets.xlsx and /dev/null differ diff --git a/nifi-nar-bundles/nifi-poi-bundle/nifi-poi-processors/src/test/resources/Unsupported.xls b/nifi-nar-bundles/nifi-poi-bundle/nifi-poi-processors/src/test/resources/Unsupported.xls deleted file mode 100644 index 6023329ba9..0000000000 Binary files a/nifi-nar-bundles/nifi-poi-bundle/nifi-poi-processors/src/test/resources/Unsupported.xls and /dev/null differ diff --git a/nifi-nar-bundles/nifi-poi-bundle/nifi-poi-processors/src/test/resources/dataformatting.xlsx b/nifi-nar-bundles/nifi-poi-bundle/nifi-poi-processors/src/test/resources/dataformatting.xlsx deleted file mode 100644 index a9428e2bda..0000000000 Binary files a/nifi-nar-bundles/nifi-poi-bundle/nifi-poi-processors/src/test/resources/dataformatting.xlsx and /dev/null differ diff --git a/nifi-nar-bundles/nifi-poi-bundle/nifi-poi-processors/src/test/resources/logback-test.xml b/nifi-nar-bundles/nifi-poi-bundle/nifi-poi-processors/src/test/resources/logback-test.xml deleted file mode 100644 index 5afbc8ea75..0000000000 --- a/nifi-nar-bundles/nifi-poi-bundle/nifi-poi-processors/src/test/resources/logback-test.xml +++ /dev/null @@ -1,32 +0,0 @@ - - - - - - - %-4r [%t] %-5p %c - %m%n - - - - - - - - - - - - diff --git a/nifi-nar-bundles/nifi-poi-bundle/nifi-poi-processors/src/test/resources/with-blank-cells.csv b/nifi-nar-bundles/nifi-poi-bundle/nifi-poi-processors/src/test/resources/with-blank-cells.csv deleted file mode 100644 index ff3f706b06..0000000000 --- a/nifi-nar-bundles/nifi-poi-bundle/nifi-poi-processors/src/test/resources/with-blank-cells.csv +++ /dev/null @@ -1,8 +0,0 @@ -A,B,C,D -A1,,, -,B2,C2, -,,C3, -,,C4,D4 -A5,,C5,D5 -A6,B6,,D6 -A7,B7,C7,D7 diff --git a/nifi-nar-bundles/nifi-poi-bundle/nifi-poi-processors/src/test/resources/with-blank-cells.xlsx b/nifi-nar-bundles/nifi-poi-bundle/nifi-poi-processors/src/test/resources/with-blank-cells.xlsx deleted file mode 100644 index a9482460aa..0000000000 Binary files a/nifi-nar-bundles/nifi-poi-bundle/nifi-poi-processors/src/test/resources/with-blank-cells.xlsx and /dev/null differ diff --git a/nifi-nar-bundles/nifi-poi-bundle/pom.xml b/nifi-nar-bundles/nifi-poi-bundle/pom.xml index d4467f087d..8b187fd9c4 100644 --- a/nifi-nar-bundles/nifi-poi-bundle/pom.xml +++ b/nifi-nar-bundles/nifi-poi-bundle/pom.xml @@ -28,7 +28,6 @@ 5.2.3 - nifi-poi-processors nifi-poi-nar nifi-poi-services