mirror of
https://github.com/apache/nifi.git
synced 2025-02-07 18:48:51 +00:00
NIFI-12100 Removed the ConvertExcelToCSVProcessor
This closes #7802 Signed-off-by: Mike Thomsen <mthomsen@apache.org>
This commit is contained in:
parent
a74c411079
commit
e9b532bd32
@ -30,11 +30,6 @@
|
||||
</properties>
|
||||
|
||||
<dependencies>
|
||||
<dependency>
|
||||
<groupId>org.apache.nifi</groupId>
|
||||
<artifactId>nifi-poi-processors</artifactId>
|
||||
<version>2.0.0-SNAPSHOT</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.nifi</groupId>
|
||||
<artifactId>nifi-poi-services</artifactId>
|
||||
|
@ -1,76 +0,0 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<!--
|
||||
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
contributor license agreements. See the NOTICE file distributed with
|
||||
this work for additional information regarding copyright ownership.
|
||||
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
(the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
-->
|
||||
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 https://maven.apache.org/xsd/maven-4.0.0.xsd">
|
||||
<modelVersion>4.0.0</modelVersion>
|
||||
<parent>
|
||||
<groupId>org.apache.nifi</groupId>
|
||||
<artifactId>nifi-poi-bundle</artifactId>
|
||||
<version>2.0.0-SNAPSHOT</version>
|
||||
</parent>
|
||||
|
||||
<artifactId>nifi-poi-processors</artifactId>
|
||||
<packaging>jar</packaging>
|
||||
|
||||
<build>
|
||||
<plugins>
|
||||
<plugin>
|
||||
<groupId>org.apache.rat</groupId>
|
||||
<artifactId>apache-rat-plugin</artifactId>
|
||||
<configuration>
|
||||
<excludes combine.children="append">
|
||||
<exclude>src/test/resources/with-blank-cells.csv</exclude>
|
||||
</excludes>
|
||||
</configuration>
|
||||
</plugin>
|
||||
</plugins>
|
||||
</build>
|
||||
<dependencies>
|
||||
<dependency>
|
||||
<groupId>org.apache.poi</groupId>
|
||||
<artifactId>poi</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.poi</groupId>
|
||||
<artifactId>poi-ooxml</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>com.github.pjfanning</groupId>
|
||||
<artifactId>excel-streaming-reader</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.logging.log4j</groupId>
|
||||
<artifactId>log4j-to-slf4j</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.nifi</groupId>
|
||||
<artifactId>nifi-api</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.nifi</groupId>
|
||||
<artifactId>nifi-utils</artifactId>
|
||||
<version>2.0.0-SNAPSHOT</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.nifi</groupId>
|
||||
<artifactId>nifi-standard-record-utils</artifactId>
|
||||
<version>2.0.0-SNAPSHOT</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.nifi</groupId>
|
||||
<artifactId>nifi-mock</artifactId>
|
||||
</dependency>
|
||||
</dependencies>
|
||||
</project>
|
@ -1,534 +0,0 @@
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.nifi.processors.poi;
|
||||
|
||||
import com.github.pjfanning.xlsx.StreamingReader;
|
||||
import com.github.pjfanning.xlsx.exceptions.OpenException;
|
||||
import com.github.pjfanning.xlsx.exceptions.ParseException;
|
||||
import com.github.pjfanning.xlsx.exceptions.ReadException;
|
||||
import java.io.IOException;
|
||||
import java.io.OutputStreamWriter;
|
||||
import java.io.PrintStream;
|
||||
import java.nio.charset.StandardCharsets;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.Collections;
|
||||
import java.util.HashMap;
|
||||
import java.util.LinkedHashSet;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
import java.util.stream.Collectors;
|
||||
import org.apache.commons.csv.CSVFormat;
|
||||
import org.apache.commons.csv.CSVPrinter;
|
||||
import org.apache.commons.io.FilenameUtils;
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
import org.apache.nifi.annotation.behavior.WritesAttribute;
|
||||
import org.apache.nifi.annotation.behavior.WritesAttributes;
|
||||
import org.apache.nifi.annotation.documentation.CapabilityDescription;
|
||||
import org.apache.nifi.annotation.documentation.Tags;
|
||||
import org.apache.nifi.components.PropertyDescriptor;
|
||||
import org.apache.nifi.csv.CSVUtils;
|
||||
import org.apache.nifi.expression.ExpressionLanguageScope;
|
||||
import org.apache.nifi.flowfile.FlowFile;
|
||||
import org.apache.nifi.flowfile.attributes.CoreAttributes;
|
||||
import org.apache.nifi.processor.AbstractProcessor;
|
||||
import org.apache.nifi.processor.ProcessContext;
|
||||
import org.apache.nifi.processor.ProcessSession;
|
||||
import org.apache.nifi.processor.ProcessorInitializationContext;
|
||||
import org.apache.nifi.processor.Relationship;
|
||||
import org.apache.nifi.processor.exception.ProcessException;
|
||||
import org.apache.nifi.processor.util.StandardValidators;
|
||||
import org.apache.poi.openxml4j.exceptions.InvalidFormatException;
|
||||
import org.apache.poi.ss.usermodel.Cell;
|
||||
import org.apache.poi.ss.usermodel.Sheet;
|
||||
import org.apache.poi.ss.usermodel.Workbook;
|
||||
|
||||
|
||||
@Tags({"excel", "csv", "poi"})
|
||||
@CapabilityDescription("Consumes a Microsoft Excel document and converts each worksheet to csv. Each sheet from the incoming Excel " +
|
||||
"document will generate a new Flowfile that will be output from this processor. Each output Flowfile's contents will be formatted as a csv file " +
|
||||
"where the each row from the excel sheet is output as a newline in the csv file. This processor is currently only capable of processing .xlsx " +
|
||||
"(XSSF 2007 OOXML file format) Excel documents and not older .xls (HSSF '97(-2007) file format) documents. This processor also expects well formatted " +
|
||||
"CSV content and will not escape cell's containing invalid content such as newlines or additional commas.")
|
||||
@WritesAttributes({@WritesAttribute(attribute = "sheetname", description = "The name of the Excel sheet that this particular row of data came from in the Excel document"),
|
||||
@WritesAttribute(attribute = "numrows", description = "The number of rows in this Excel Sheet"),
|
||||
@WritesAttribute(attribute = "sourcefilename", description = "The name of the Excel document file that this data originated from"),
|
||||
@WritesAttribute(attribute = "convertexceltocsvprocessor.error", description = "Error message that was encountered on a per Excel sheet basis. This attribute is" +
|
||||
" only populated if an error was occured while processing the particular sheet. Having the error present at the sheet level will allow for the end" +
|
||||
" user to better understand what syntax errors in their excel doc on a larger scale caused the error.")})
|
||||
public class ConvertExcelToCSVProcessor extends AbstractProcessor {
|
||||
|
||||
private static final String CSV_MIME_TYPE = "text/csv";
|
||||
public static final String SHEET_NAME = "sheetname";
|
||||
public static final String ROW_NUM = "numrows";
|
||||
public static final String SOURCE_FILE_NAME = "sourcefilename";
|
||||
private static final String DESIRED_SHEETS_DELIMITER = ",";
|
||||
private static final String UNKNOWN_SHEET_NAME = "UNKNOWN";
|
||||
|
||||
public static final PropertyDescriptor DESIRED_SHEETS = new PropertyDescriptor
|
||||
.Builder().name("extract-sheets")
|
||||
.displayName("Sheets to Extract")
|
||||
.description("Comma separated list of Excel document sheet names that should be extracted from the excel document. If this property" +
|
||||
" is left blank then all of the sheets will be extracted from the Excel document. The list of names is case in-sensitive. Any sheets not " +
|
||||
"specified in this value will be ignored. A bulletin will be generated if a specified sheet(s) are not found.")
|
||||
.required(false)
|
||||
.expressionLanguageSupported(ExpressionLanguageScope.FLOWFILE_ATTRIBUTES)
|
||||
.addValidator(StandardValidators.NON_EMPTY_VALIDATOR)
|
||||
.build();
|
||||
|
||||
public static final PropertyDescriptor ROWS_TO_SKIP = new PropertyDescriptor
|
||||
.Builder().name("excel-extract-first-row")
|
||||
.displayName("Number of Rows to Skip")
|
||||
.description("The row number of the first row to start processing."
|
||||
+ "Use this to skip over rows of data at the top of your worksheet that are not part of the dataset."
|
||||
+ "Empty rows of data anywhere in the spreadsheet will always be skipped, no matter what this value is set to.")
|
||||
.required(true)
|
||||
.defaultValue("0")
|
||||
.expressionLanguageSupported(ExpressionLanguageScope.FLOWFILE_ATTRIBUTES)
|
||||
.addValidator(StandardValidators.NON_NEGATIVE_INTEGER_VALIDATOR)
|
||||
.build();
|
||||
|
||||
public static final PropertyDescriptor COLUMNS_TO_SKIP = new PropertyDescriptor
|
||||
.Builder().name("excel-extract-column-to-skip")
|
||||
.displayName("Columns To Skip")
|
||||
.description("Comma delimited list of column numbers to skip. Use the columns number and not the letter designation. "
|
||||
+ "Use this to skip over columns anywhere in your worksheet that you don't want extracted as part of the record.")
|
||||
.required(false)
|
||||
.expressionLanguageSupported(ExpressionLanguageScope.FLOWFILE_ATTRIBUTES)
|
||||
.addValidator(StandardValidators.NON_EMPTY_VALIDATOR)
|
||||
.build();
|
||||
|
||||
public static final PropertyDescriptor FORMAT_VALUES = new PropertyDescriptor.Builder()
|
||||
.name("excel-format-values")
|
||||
.displayName("Format Cell Values")
|
||||
.description("Should the cell values be written to CSV using the formatting applied in Excel, or should they be printed as raw values.")
|
||||
.allowableValues("true", "false")
|
||||
.defaultValue("false")
|
||||
.required(true)
|
||||
.build();
|
||||
|
||||
public static final Relationship ORIGINAL = new Relationship.Builder()
|
||||
.name("original")
|
||||
.description("Original Excel document received by this processor")
|
||||
.build();
|
||||
|
||||
public static final Relationship SUCCESS = new Relationship.Builder()
|
||||
.name("success")
|
||||
.description("Excel data converted to csv")
|
||||
.build();
|
||||
|
||||
public static final Relationship FAILURE = new Relationship.Builder()
|
||||
.name("failure")
|
||||
.description("Failed to parse the Excel document")
|
||||
.build();
|
||||
|
||||
private List<PropertyDescriptor> descriptors;
|
||||
|
||||
private Set<Relationship> relationships;
|
||||
|
||||
@Override
|
||||
protected void init(final ProcessorInitializationContext context) {
|
||||
final List<PropertyDescriptor> descriptors = new ArrayList<>();
|
||||
descriptors.add(DESIRED_SHEETS);
|
||||
descriptors.add(ROWS_TO_SKIP);
|
||||
descriptors.add(COLUMNS_TO_SKIP);
|
||||
descriptors.add(FORMAT_VALUES);
|
||||
|
||||
descriptors.add(CSVUtils.CSV_FORMAT);
|
||||
descriptors.add(CSVUtils.VALUE_SEPARATOR);
|
||||
descriptors.add(CSVUtils.INCLUDE_HEADER_LINE);
|
||||
descriptors.add(CSVUtils.QUOTE_CHAR);
|
||||
descriptors.add(CSVUtils.ESCAPE_CHAR);
|
||||
descriptors.add(CSVUtils.COMMENT_MARKER);
|
||||
descriptors.add(CSVUtils.NULL_STRING);
|
||||
descriptors.add(CSVUtils.TRIM_FIELDS);
|
||||
descriptors.add(new PropertyDescriptor.Builder()
|
||||
.fromPropertyDescriptor(CSVUtils.QUOTE_MODE)
|
||||
.defaultValue(CSVUtils.QUOTE_NONE.getValue())
|
||||
.build());
|
||||
descriptors.add(CSVUtils.RECORD_SEPARATOR);
|
||||
descriptors.add(CSVUtils.TRAILING_DELIMITER);
|
||||
this.descriptors = Collections.unmodifiableList(descriptors);
|
||||
|
||||
final Set<Relationship> relationships = new LinkedHashSet<>();
|
||||
relationships.add(ORIGINAL);
|
||||
relationships.add(SUCCESS);
|
||||
relationships.add(FAILURE);
|
||||
this.relationships = Collections.unmodifiableSet(relationships);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Set<Relationship> getRelationships() {
|
||||
return this.relationships;
|
||||
}
|
||||
|
||||
@Override
|
||||
public final List<PropertyDescriptor> getSupportedPropertyDescriptors() {
|
||||
return descriptors;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void onTrigger(final ProcessContext context, final ProcessSession session) throws ProcessException {
|
||||
final FlowFile flowFile = session.get();
|
||||
if (flowFile == null) {
|
||||
return;
|
||||
}
|
||||
|
||||
final Map<String, Boolean> desiredSheets = getDesiredSheets(context, flowFile);
|
||||
final boolean formatValues = context.getProperty(FORMAT_VALUES).asBoolean();
|
||||
final CSVFormat csvFormat = CSVUtils.createCSVFormat(context, flowFile.getAttributes());
|
||||
|
||||
//Switch to 0 based index
|
||||
final int firstRow = context.getProperty(ROWS_TO_SKIP).evaluateAttributeExpressions(flowFile).asInteger() - 1;
|
||||
final List<Integer> columnsToSkip = getColumnsToSkip(context, flowFile);
|
||||
|
||||
try {
|
||||
session.read(flowFile, inputStream -> {
|
||||
try (Workbook workbook = StreamingReader.builder()
|
||||
.rowCacheSize(100)
|
||||
.bufferSize(4096)
|
||||
.setReadStyles(formatValues)
|
||||
.open(inputStream)) {
|
||||
|
||||
if (!desiredSheets.isEmpty()) {
|
||||
desiredSheets.keySet().forEach(desiredSheet -> workbook.forEach(sheet -> {
|
||||
if (sheet.getSheetName().equalsIgnoreCase(desiredSheet)) {
|
||||
ExcelSheetReadConfig readConfig = new ExcelSheetReadConfig(columnsToSkip, firstRow, sheet.getSheetName());
|
||||
handleExcelSheet(session, flowFile, sheet, readConfig, csvFormat);
|
||||
desiredSheets.put(desiredSheet, Boolean.TRUE);
|
||||
}
|
||||
}));
|
||||
|
||||
String sheetsNotFound = getSheetsNotFound(desiredSheets);
|
||||
if (!sheetsNotFound.isEmpty()) {
|
||||
getLogger().warn("Excel sheet(s) not found: {}", sheetsNotFound);
|
||||
}
|
||||
} else {
|
||||
workbook.forEach(sheet -> {
|
||||
ExcelSheetReadConfig readConfig = new ExcelSheetReadConfig(columnsToSkip, firstRow, sheet.getSheetName());
|
||||
handleExcelSheet(session, flowFile, sheet, readConfig, csvFormat);
|
||||
});
|
||||
}
|
||||
} catch (ParseException | OpenException | ReadException e) {
|
||||
if (e.getCause() instanceof InvalidFormatException) {
|
||||
String msg = "Only .xlsx Excel 2007 OOXML files are supported";
|
||||
getLogger().error(msg, e);
|
||||
throw new UnsupportedOperationException(msg, e);
|
||||
}
|
||||
getLogger().error("Error occurred while processing Excel document metadata", e);
|
||||
}
|
||||
});
|
||||
|
||||
session.transfer(flowFile, ORIGINAL);
|
||||
|
||||
} catch (RuntimeException ex) {
|
||||
getLogger().error("Failed to process incoming Excel document. " + ex.getMessage(), ex);
|
||||
FlowFile failedFlowFile = session.putAttribute(flowFile,
|
||||
ConvertExcelToCSVProcessor.class.getName() + ".error", ex.getMessage());
|
||||
session.transfer(failedFlowFile, FAILURE);
|
||||
}
|
||||
}
|
||||
|
||||
private List<Integer> getColumnsToSkip(final ProcessContext context, FlowFile flowFile) {
|
||||
final String[] columnsToSkip = StringUtils.split(context.getProperty(COLUMNS_TO_SKIP)
|
||||
.evaluateAttributeExpressions(flowFile).getValue(), ",");
|
||||
|
||||
if (columnsToSkip != null) {
|
||||
try {
|
||||
return Arrays.stream(columnsToSkip)
|
||||
.map(columnToSkip -> Integer.parseInt(columnToSkip) - 1)
|
||||
.collect(Collectors.toList());
|
||||
} catch (NumberFormatException e) {
|
||||
throw new ProcessException("Invalid column in Columns to Skip list.", e);
|
||||
}
|
||||
}
|
||||
|
||||
return new ArrayList<>();
|
||||
}
|
||||
|
||||
private Map<String, Boolean> getDesiredSheets(final ProcessContext context, FlowFile flowFile) {
|
||||
final String desiredSheetsDelimited = context.getProperty(DESIRED_SHEETS).evaluateAttributeExpressions(flowFile).getValue();
|
||||
if (desiredSheetsDelimited != null) {
|
||||
String[] desiredSheets = StringUtils.split(desiredSheetsDelimited, DESIRED_SHEETS_DELIMITER);
|
||||
if (desiredSheets != null) {
|
||||
return Arrays.stream(desiredSheets)
|
||||
.collect(Collectors.toMap(key -> key, value -> Boolean.FALSE));
|
||||
} else {
|
||||
getLogger().debug("Excel document was parsed but no sheets with the specified desired names were found.");
|
||||
}
|
||||
}
|
||||
|
||||
return new HashMap<>();
|
||||
}
|
||||
|
||||
/**
|
||||
* Handles an individual Excel sheet from the entire Excel document. Each sheet will result in an individual flowfile.
|
||||
*
|
||||
* @param session The NiFi ProcessSession instance for the current invocation.
|
||||
*/
|
||||
private void handleExcelSheet(ProcessSession session, FlowFile originalParentFF, final Sheet sheet, ExcelSheetReadConfig readConfig,
|
||||
CSVFormat csvFormat) {
|
||||
|
||||
FlowFile ff = session.create(originalParentFF);
|
||||
final SheetToCSV sheetHandler = new SheetToCSV(readConfig, csvFormat);
|
||||
try {
|
||||
ff = session.write(ff, out -> {
|
||||
PrintStream outPrint = new PrintStream(out, false, StandardCharsets.UTF_8);
|
||||
sheetHandler.setOutput(outPrint);
|
||||
sheet.forEach(row -> {
|
||||
sheetHandler.startRow(row.getRowNum());
|
||||
row.forEach(sheetHandler::cell);
|
||||
sheetHandler.endRow();
|
||||
});
|
||||
sheetHandler.close();
|
||||
});
|
||||
|
||||
ff = session.putAttribute(ff, SHEET_NAME, readConfig.getSheetName());
|
||||
ff = session.putAttribute(ff, ROW_NUM, Long.toString(sheetHandler.getRowCount()));
|
||||
|
||||
if (StringUtils.isNotEmpty(originalParentFF.getAttribute(CoreAttributes.FILENAME.key()))) {
|
||||
ff = session.putAttribute(ff, SOURCE_FILE_NAME, originalParentFF.getAttribute(CoreAttributes.FILENAME.key()));
|
||||
} else {
|
||||
ff = session.putAttribute(ff, SOURCE_FILE_NAME, UNKNOWN_SHEET_NAME);
|
||||
}
|
||||
|
||||
//Update the CoreAttributes.FILENAME to have the .csv extension now. Also update MIME.TYPE
|
||||
ff = session.putAttribute(ff, CoreAttributes.FILENAME.key(), updateFilenameToCSVExtension(ff.getAttribute(CoreAttributes.UUID.key()),
|
||||
ff.getAttribute(CoreAttributes.FILENAME.key()), readConfig.getSheetName()));
|
||||
ff = session.putAttribute(ff, CoreAttributes.MIME_TYPE.key(), CSV_MIME_TYPE);
|
||||
|
||||
session.transfer(ff, SUCCESS);
|
||||
|
||||
} catch (RuntimeException e) {
|
||||
ff = session.putAttribute(ff, ConvertExcelToCSVProcessor.class.getName() + ".error", e.getMessage());
|
||||
session.transfer(ff, FAILURE);
|
||||
}
|
||||
}
|
||||
|
||||
private String getSheetsNotFound(Map<String, Boolean> desiredSheets) {
|
||||
return desiredSheets.entrySet().stream()
|
||||
.filter(entry -> !entry.getValue())
|
||||
.map(Map.Entry::getKey)
|
||||
.collect(Collectors.joining(","));
|
||||
}
|
||||
|
||||
/**
|
||||
* Uses the com.github.pjfanning streaming cell implementation to
|
||||
* do most of the work of parsing the contents of the Excel sheet
|
||||
* and outputs the contents as a (basic) CSV.
|
||||
*/
|
||||
private class SheetToCSV {
|
||||
private final ExcelSheetReadConfig readConfig;
|
||||
CSVFormat csvFormat;
|
||||
private boolean firstCellOfRow;
|
||||
private boolean skipRow;
|
||||
private int currentRow = -1;
|
||||
private int currentCol = -1;
|
||||
private int rowCount = 0;
|
||||
private int skippedColumns = 0;
|
||||
private CSVPrinter printer;
|
||||
private boolean firstRow = false;
|
||||
private ArrayList<String> fieldValues;
|
||||
|
||||
public int getRowCount() {
|
||||
return rowCount;
|
||||
}
|
||||
|
||||
public void setOutput(PrintStream output) {
|
||||
final OutputStreamWriter streamWriter = new OutputStreamWriter(output, StandardCharsets.UTF_8);
|
||||
|
||||
try {
|
||||
printer = new CSVPrinter(streamWriter, csvFormat);
|
||||
} catch (IOException e) {
|
||||
throw new ProcessException("Failed to create CSV Printer.", e);
|
||||
}
|
||||
}
|
||||
|
||||
public SheetToCSV(ExcelSheetReadConfig readConfig, CSVFormat csvFormat) {
|
||||
this.readConfig = readConfig;
|
||||
this.csvFormat = csvFormat;
|
||||
}
|
||||
|
||||
public void startRow(int rowNum) {
|
||||
if (rowNum <= readConfig.getOverrideFirstRow()) {
|
||||
skipRow = true;
|
||||
return;
|
||||
}
|
||||
|
||||
// Prepare for this row
|
||||
skipRow = false;
|
||||
firstCellOfRow = true;
|
||||
firstRow = currentRow == -1;
|
||||
currentRow = rowNum;
|
||||
currentCol = -1;
|
||||
fieldValues = new ArrayList<>();
|
||||
}
|
||||
|
||||
public void endRow() {
|
||||
if(skipRow) {
|
||||
return;
|
||||
}
|
||||
|
||||
if(firstRow) {
|
||||
readConfig.setLastColumn(currentCol);
|
||||
}
|
||||
|
||||
//if there was no data in this row, don't write it
|
||||
if(fieldValues.stream()
|
||||
.noneMatch(string -> string != null && !string.isEmpty())) {
|
||||
return;
|
||||
}
|
||||
|
||||
// Ensure the correct number of columns
|
||||
int columnsToAdd = (readConfig.getLastColumn() - currentCol) - readConfig.getColumnsToSkip().size();
|
||||
for (int i = 0; i < columnsToAdd; i++) {
|
||||
fieldValues.add(null);
|
||||
}
|
||||
|
||||
try {
|
||||
printer.printRecord(fieldValues);
|
||||
} catch (IOException e) {
|
||||
getLogger().warn("Print Record failed", e);
|
||||
}
|
||||
|
||||
rowCount++;
|
||||
}
|
||||
|
||||
public void cell(Cell cell) {
|
||||
if (skipRow) {
|
||||
return;
|
||||
}
|
||||
|
||||
// Did we miss any cells?
|
||||
int thisCol = cell.getColumnIndex();
|
||||
|
||||
//Use the first row of the file to decide on the area of data to export
|
||||
if (firstRow && firstCellOfRow) {
|
||||
readConfig.setFirstColumn(thisCol);
|
||||
}
|
||||
|
||||
//if this cell falls outside our area, or has been explicitly marked as a skipped column, return and don't write it out.
|
||||
if (!firstRow && (thisCol < readConfig.getFirstColumn() || thisCol > readConfig.getLastColumn())) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (readConfig.getColumnsToSkip().contains(thisCol)) {
|
||||
skippedColumns++;
|
||||
return;
|
||||
}
|
||||
|
||||
int missedCols = (thisCol - readConfig.getFirstColumn()) - (currentCol - readConfig.getFirstColumn()) - 1;
|
||||
if (firstCellOfRow) {
|
||||
missedCols = (thisCol - readConfig.getFirstColumn());
|
||||
}
|
||||
|
||||
missedCols -= skippedColumns;
|
||||
|
||||
if (firstCellOfRow) {
|
||||
firstCellOfRow = false;
|
||||
}
|
||||
|
||||
for (int i = 0; i < missedCols; i++) {
|
||||
fieldValues.add(null);
|
||||
}
|
||||
currentCol = thisCol;
|
||||
|
||||
String stringCellValue = cell.getStringCellValue();
|
||||
fieldValues.add(stringCellValue != null && !stringCellValue.isEmpty() ? stringCellValue : null);
|
||||
|
||||
skippedColumns = 0;
|
||||
}
|
||||
|
||||
public void close() throws IOException {
|
||||
printer.close();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Takes the original input filename and updates it by removing the file extension and replacing it with
|
||||
* the .csv extension.
|
||||
*
|
||||
* @param origFileName Original filename from the input file.
|
||||
* @return The new filename with the .csv extension that should be place in the output flowfile's attributes
|
||||
*/
|
||||
private String updateFilenameToCSVExtension(String nifiUUID, String origFileName, String sheetName) {
|
||||
|
||||
StringBuilder stringBuilder = new StringBuilder();
|
||||
|
||||
if (StringUtils.isNotEmpty(origFileName)) {
|
||||
String ext = FilenameUtils.getExtension(origFileName);
|
||||
if (StringUtils.isNotEmpty(ext)) {
|
||||
stringBuilder.append(StringUtils.replace(origFileName, ("." + ext), ""));
|
||||
} else {
|
||||
stringBuilder.append(origFileName);
|
||||
}
|
||||
} else {
|
||||
stringBuilder.append(nifiUUID);
|
||||
}
|
||||
|
||||
stringBuilder.append("_");
|
||||
stringBuilder.append(sheetName);
|
||||
stringBuilder.append(".");
|
||||
stringBuilder.append("csv");
|
||||
|
||||
return stringBuilder.toString();
|
||||
}
|
||||
|
||||
private static class ExcelSheetReadConfig {
|
||||
public String getSheetName() {
|
||||
return sheetName;
|
||||
}
|
||||
|
||||
public int getFirstColumn() {
|
||||
return firstColumn;
|
||||
}
|
||||
|
||||
public void setFirstColumn(int value) {
|
||||
this.firstColumn = value;
|
||||
}
|
||||
|
||||
public int getLastColumn() {
|
||||
return lastColumn;
|
||||
}
|
||||
|
||||
public void setLastColumn(int lastColumn) {
|
||||
this.lastColumn = lastColumn;
|
||||
}
|
||||
|
||||
public int getOverrideFirstRow() {
|
||||
return overrideFirstRow;
|
||||
}
|
||||
|
||||
public List<Integer> getColumnsToSkip() {
|
||||
return columnsToSkip;
|
||||
}
|
||||
|
||||
private int firstColumn;
|
||||
private int lastColumn;
|
||||
private final int overrideFirstRow;
|
||||
private final String sheetName;
|
||||
private final List<Integer> columnsToSkip;
|
||||
|
||||
public ExcelSheetReadConfig(List<Integer> columnsToSkip, int overrideFirstRow, String sheetName) {
|
||||
|
||||
this.sheetName = sheetName;
|
||||
this.columnsToSkip = columnsToSkip;
|
||||
this.overrideFirstRow = overrideFirstRow;
|
||||
}
|
||||
}
|
||||
}
|
@ -1,15 +0,0 @@
|
||||
# Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
# contributor license agreements. See the NOTICE file distributed with
|
||||
# this work for additional information regarding copyright ownership.
|
||||
# The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
# (the "License"); you may not use this file except in compliance with
|
||||
# the License. You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
org.apache.nifi.processors.poi.ConvertExcelToCSVProcessor
|
@ -1,97 +0,0 @@
|
||||
<!DOCTYPE html>
|
||||
<html lang="en">
|
||||
<!--
|
||||
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
contributor license agreements. See the NOTICE file distributed with
|
||||
this work for additional information regarding copyright ownership.
|
||||
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
(the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
-->
|
||||
<head>
|
||||
<meta charset="utf-8" />
|
||||
<title>ConvertExcelToCSVProcessor</title>
|
||||
<style>
|
||||
table {
|
||||
border-collapse: collapse;
|
||||
}
|
||||
|
||||
table, th, td {
|
||||
border: 1px solid #ccc;
|
||||
}
|
||||
|
||||
td.r {
|
||||
text-align: right;
|
||||
}
|
||||
|
||||
td {
|
||||
width: 50px;
|
||||
padding: 5px;
|
||||
}
|
||||
</style>
|
||||
<link rel="stylesheet" href="../../../../../css/component-usage.css" type="text/css" />
|
||||
</head>
|
||||
|
||||
<body>
|
||||
<h2>How it extracts CSV data from a sheet</h2>
|
||||
<p>
|
||||
ConvertExcelToCSVProcessor extracts CSV data with following rules:
|
||||
</p>
|
||||
<ul>
|
||||
<li>Find the fist cell which has a value in it (the FirstCell).</li>
|
||||
<li>Scan cells in the first row, starting from the FirstCell,
|
||||
until it reaches to a cell after which no cell with a value can not be found in the row (the FirstRowLastCell).</li>
|
||||
<li>Process the 2nd row and later, from the column of FirstCell to the column of FirstRowLastCell.</li>
|
||||
<li>If a row does not have any cell that has a value, then the row is ignored.</li>
|
||||
</ul>
|
||||
|
||||
<p>
|
||||
As an example, the sheet shown below will be:
|
||||
</p>
|
||||
|
||||
<table>
|
||||
<tbody>
|
||||
<tr><th>row </th><th>A</th><th>B</th><th>C</th><th>D</th><th>E</th><th>F</th><th>G</th></tr>
|
||||
<tr><td class="r"> 1</td><td> </td><td> </td><td> </td><td> </td><td> </td><td> </td><td> </td></tr>
|
||||
<tr><td class="r"> 2</td><td> </td><td> </td><td>x</td><td>y</td><td>z</td><td> </td><td> </td></tr>
|
||||
<tr><td class="r"> 3</td><td> </td><td> </td><td>1</td><td> </td><td> </td><td> </td><td> </td></tr>
|
||||
<tr><td class="r"> 4</td><td>2</td><td> </td><td> </td><td>3</td><td> </td><td> </td><td> </td></tr>
|
||||
<tr><td class="r"> 5</td><td> </td><td> </td><td> </td><td> </td><td>4</td><td> </td><td> </td></tr>
|
||||
<tr><td class="r"> 6</td><td> </td><td> </td><td>5</td><td>6</td><td>7</td><td> </td><td> </td></tr>
|
||||
<tr><td class="r"> 7</td><td> </td><td> </td><td> </td><td> </td><td> </td><td>8</td><td> </td></tr>
|
||||
<tr><td class="r"> 8</td><td> </td><td> </td><td> </td><td> </td><td> </td><td> </td><td> </td></tr>
|
||||
<tr><td class="r"> 9</td><td> </td><td> </td><td> </td><td> </td><td>9</td><td> </td><td> </td></tr>
|
||||
<tr><td class="r"> 10</td><td> </td><td> </td><td> </td><td> </td><td> </td><td> </td><td> </td></tr>
|
||||
<tr><td class="r"> 11</td><td> </td><td> </td><td> </td><td> </td><td> </td><td> </td><td> </td></tr>
|
||||
</tbody>
|
||||
</table>
|
||||
|
||||
<p>
|
||||
converted to following CSV:
|
||||
</p>
|
||||
|
||||
<pre>
|
||||
x,y,z
|
||||
1,,
|
||||
,3,
|
||||
,,4
|
||||
5,6,7
|
||||
,,9
|
||||
</pre>
|
||||
|
||||
<ul>
|
||||
<li>C2(x) is the FirstCell, and E2(z) is the FirstRowLastCell.</li>
|
||||
<li>A4(2) is ignored because it is out of range. So is F7(8).</li>
|
||||
<li>Row 7 and 8 are ignored because those do not have a valid cell.</li>
|
||||
<li>It is important to have a header row as shown in the example to define data area,
|
||||
especially when a sheet includes empty cells.</li>
|
||||
</ul>
|
||||
|
||||
</body>
|
||||
</html>
|
@ -1,578 +0,0 @@
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.nifi.processors.poi;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.IOException;
|
||||
import java.net.URL;
|
||||
import java.text.DecimalFormatSymbols;
|
||||
import java.time.LocalDateTime;
|
||||
import java.time.format.DateTimeFormatter;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
import org.apache.nifi.csv.CSVUtils;
|
||||
import org.apache.nifi.flowfile.attributes.CoreAttributes;
|
||||
import org.apache.nifi.util.LogMessage;
|
||||
import org.apache.nifi.util.MockFlowFile;
|
||||
import org.apache.nifi.util.TestRunner;
|
||||
import org.apache.nifi.util.TestRunners;
|
||||
import org.junit.jupiter.api.BeforeEach;
|
||||
import org.junit.jupiter.api.Test;
|
||||
|
||||
import static org.junit.jupiter.api.Assertions.assertFalse;
|
||||
import static org.junit.jupiter.api.Assertions.assertNotNull;
|
||||
import static org.junit.jupiter.api.Assertions.assertTrue;
|
||||
import static org.junit.jupiter.api.Assertions.assertEquals;
|
||||
|
||||
public class ConvertExcelToCSVProcessorTest {
|
||||
|
||||
private TestRunner testRunner;
|
||||
|
||||
@BeforeEach
|
||||
public void init() {
|
||||
testRunner = TestRunners.newTestRunner(ConvertExcelToCSVProcessor.class);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testMultipleSheetsGeneratesMultipleFlowFiles() throws IOException {
|
||||
|
||||
Map<String, String> attributes = new HashMap<>();
|
||||
attributes.put("test", "attribute");
|
||||
|
||||
final URL resourceUrl = getClass().getResource("/TwoSheets.xlsx");
|
||||
assertNotNull(resourceUrl);
|
||||
|
||||
testRunner.enqueue(new File(resourceUrl.getPath()).toPath(), attributes);
|
||||
testRunner.run();
|
||||
|
||||
testRunner.assertTransferCount(ConvertExcelToCSVProcessor.SUCCESS, 2);
|
||||
testRunner.assertTransferCount(ConvertExcelToCSVProcessor.ORIGINAL, 1);
|
||||
testRunner.assertTransferCount(ConvertExcelToCSVProcessor.FAILURE, 0);
|
||||
|
||||
MockFlowFile ffSheetA = testRunner.getFlowFilesForRelationship(ConvertExcelToCSVProcessor.SUCCESS).get(0);
|
||||
long rowsSheetA = Long.parseLong(ffSheetA.getAttribute(ConvertExcelToCSVProcessor.ROW_NUM));
|
||||
assertEquals(4, rowsSheetA);
|
||||
assertTrue(ffSheetA.getAttribute(ConvertExcelToCSVProcessor.SHEET_NAME).equalsIgnoreCase("TestSheetA"));
|
||||
assertEquals("TwoSheets.xlsx", ffSheetA.getAttribute(ConvertExcelToCSVProcessor.SOURCE_FILE_NAME));
|
||||
|
||||
//Since TestRunner.run() will create a random filename even if the attribute is set in enqueue manually we just check that "_{SHEETNAME}.csv is present
|
||||
assertTrue(ffSheetA.getAttribute(CoreAttributes.FILENAME.key()).endsWith("_TestSheetA.csv"));
|
||||
assertEquals("attribute", ffSheetA.getAttribute("test"));
|
||||
|
||||
MockFlowFile ffSheetB = testRunner.getFlowFilesForRelationship(ConvertExcelToCSVProcessor.SUCCESS).get(1);
|
||||
long rowsSheetB = Long.parseLong(ffSheetB.getAttribute(ConvertExcelToCSVProcessor.ROW_NUM));
|
||||
assertEquals(3, rowsSheetB);
|
||||
assertTrue(ffSheetB.getAttribute(ConvertExcelToCSVProcessor.SHEET_NAME).equalsIgnoreCase("TestSheetB"));
|
||||
assertEquals("TwoSheets.xlsx", ffSheetB.getAttribute(ConvertExcelToCSVProcessor.SOURCE_FILE_NAME));
|
||||
|
||||
//Since TestRunner.run() will create a random filename even if the attribute is set in enqueue manually we just check that "_{SHEETNAME}.csv is present
|
||||
assertTrue(ffSheetB.getAttribute(CoreAttributes.FILENAME.key()).endsWith("_TestSheetB.csv"));
|
||||
assertEquals("attribute", ffSheetB.getAttribute("test"));
|
||||
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testDataFormatting() {
|
||||
testRunner.enqueue(getClass().getResourceAsStream("/dataformatting.xlsx"));
|
||||
|
||||
testRunner.setProperty(ConvertExcelToCSVProcessor.FORMAT_VALUES, "false");
|
||||
|
||||
testRunner.run();
|
||||
|
||||
testRunner.assertTransferCount(ConvertExcelToCSVProcessor.SUCCESS, 1);
|
||||
testRunner.assertTransferCount(ConvertExcelToCSVProcessor.ORIGINAL, 1);
|
||||
testRunner.assertTransferCount(ConvertExcelToCSVProcessor.FAILURE, 0);
|
||||
|
||||
MockFlowFile ff = testRunner.getFlowFilesForRelationship(ConvertExcelToCSVProcessor.SUCCESS).get(0);
|
||||
long rowsSheet = Long.parseLong(ff.getAttribute(ConvertExcelToCSVProcessor.ROW_NUM));
|
||||
assertEquals(9, rowsSheet);
|
||||
|
||||
ff.assertContentEquals("Numbers,Timestamps,Money\n" +
|
||||
"1234.4559999999999,42736.5,123.45\n" +
|
||||
"1234.4559999999999,42736.5,123.45\n" +
|
||||
"1234.4559999999999,42736.5,123.45\n" +
|
||||
"1234.4559999999999,42736.5,1023.45\n" +
|
||||
"1234.4559999999999,42736.5,1023.45\n" +
|
||||
"987654321,42736.5,1023.45\n" +
|
||||
"987654321,,\n" +
|
||||
"987654321,,\n");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testQuoting() {
|
||||
testRunner.enqueue(getClass().getResourceAsStream("/dataformatting.xlsx"));
|
||||
|
||||
testRunner.setProperty(CSVUtils.QUOTE_MODE, CSVUtils.QUOTE_MINIMAL);
|
||||
testRunner.setProperty(ConvertExcelToCSVProcessor.FORMAT_VALUES, "true");
|
||||
|
||||
testRunner.run();
|
||||
|
||||
testRunner.assertTransferCount(ConvertExcelToCSVProcessor.SUCCESS, 1);
|
||||
testRunner.assertTransferCount(ConvertExcelToCSVProcessor.ORIGINAL, 1);
|
||||
testRunner.assertTransferCount(ConvertExcelToCSVProcessor.FAILURE, 0);
|
||||
|
||||
MockFlowFile ff = testRunner.getFlowFilesForRelationship(ConvertExcelToCSVProcessor.SUCCESS).get(0);
|
||||
long rowsSheet = Long.parseLong(ff.getAttribute(ConvertExcelToCSVProcessor.ROW_NUM));
|
||||
assertEquals(9, rowsSheet);
|
||||
|
||||
LocalDateTime localDt = LocalDateTime.of(2017, 1, 1, 12, 0, 0);
|
||||
DecimalFormatSymbols decimalFormatSymbols = DecimalFormatSymbols.getInstance();
|
||||
char decimalSeparator = decimalFormatSymbols.getDecimalSeparator();
|
||||
char groupingSeparator = decimalFormatSymbols.getGroupingSeparator();
|
||||
ff.assertContentEquals(("Numbers,Timestamps,Money\n" +
|
||||
addQuotingIfNeeded(String.format("1234%1$s456", decimalSeparator)) + "," + DateTimeFormatter.ofPattern("d/M/yy").format(localDt) + "," +
|
||||
addQuotingIfNeeded(String.format("$ 123%1$s45", decimalSeparator)) + "\n" +
|
||||
addQuotingIfNeeded(String.format("1234%1$s46", decimalSeparator)) + "," + DateTimeFormatter.ofPattern("hh:mm:ss a").format(localDt) + "," +
|
||||
addQuotingIfNeeded(String.format("£ 123%1$s45", decimalSeparator)) + "\n" +
|
||||
addQuotingIfNeeded(String.format("1234%1$s5", decimalSeparator)) + ",\"" + DateTimeFormatter.ofPattern("EEEE, MMMM dd, yyyy").format(localDt) + "\"," +
|
||||
addQuotingIfNeeded(String.format("¥ 123%1$s45", decimalSeparator)) + "\n" +
|
||||
addQuotingIfNeeded(String.format("1%2$s234%1$s46", decimalSeparator, groupingSeparator)) + "," + DateTimeFormatter.ofPattern("d/M/yy HH:mm").format(localDt) + "," +
|
||||
addQuotingIfNeeded(String.format("$ 1%2$s023%1$s45", decimalSeparator, groupingSeparator)) + "\n" +
|
||||
addQuotingIfNeeded(String.format("1%2$s234%1$s4560", decimalSeparator, groupingSeparator)) + "," + DateTimeFormatter.ofPattern("hh:mm a").format(localDt) + "," +
|
||||
addQuotingIfNeeded(String.format("£ 1%2$s023%1$s45", decimalSeparator, groupingSeparator)) + "\n" +
|
||||
addQuotingIfNeeded(String.format("9%1$s88E+08", decimalSeparator)) + "," + DateTimeFormatter.ofPattern("yyyy/MM/dd/ HH:mm").format(localDt) + "," +
|
||||
addQuotingIfNeeded(String.format("¥ 1%2$s023%1$s45", decimalSeparator, groupingSeparator)) + "\n" +
|
||||
addQuotingIfNeeded(String.format("9%1$s877E+08", decimalSeparator)) + ",,\n" +
|
||||
addQuotingIfNeeded(String.format("9%1$s8765E+08", decimalSeparator)) + ",,\n").replace("E+", getExponentSeparator(decimalFormatSymbols)));
|
||||
}
|
||||
|
||||
/**
|
||||
* Workaround for interaction between {@link DecimalFormatSymbols} and use of custom {@link java.util.Locale}.
|
||||
*/
|
||||
private static String getExponentSeparator(final DecimalFormatSymbols decimalFormatSymbols) {
|
||||
final String exponentSeparator = decimalFormatSymbols.getExponentSeparator();
|
||||
return (exponentSeparator.equals("e") ? "e" : exponentSeparator + "+");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testSkipRows() {
|
||||
testRunner.enqueue(getClass().getResourceAsStream("/dataformatting.xlsx"));
|
||||
|
||||
testRunner.setProperty(ConvertExcelToCSVProcessor.ROWS_TO_SKIP, "2");
|
||||
testRunner.setProperty(ConvertExcelToCSVProcessor.FORMAT_VALUES, "true");
|
||||
|
||||
testRunner.run();
|
||||
|
||||
testRunner.assertTransferCount(ConvertExcelToCSVProcessor.SUCCESS, 1);
|
||||
testRunner.assertTransferCount(ConvertExcelToCSVProcessor.ORIGINAL, 1);
|
||||
testRunner.assertTransferCount(ConvertExcelToCSVProcessor.FAILURE, 0);
|
||||
|
||||
MockFlowFile ff = testRunner.getFlowFilesForRelationship(ConvertExcelToCSVProcessor.SUCCESS).get(0);
|
||||
long rowsSheet = Long.parseLong(ff.getAttribute(ConvertExcelToCSVProcessor.ROW_NUM));
|
||||
assertEquals(7, rowsSheet, "Row count does match expected value.");
|
||||
|
||||
LocalDateTime localDt = LocalDateTime.of(2017, 1, 1, 12, 0, 0);
|
||||
DecimalFormatSymbols decimalFormatSymbols = DecimalFormatSymbols.getInstance();
|
||||
String decimalSeparator = decimalFormatSymbols.getDecimalSeparator() == ',' ? "\\," : String.valueOf(decimalFormatSymbols.getDecimalSeparator());
|
||||
String groupingSeparator = decimalFormatSymbols.getGroupingSeparator() == ',' ? "\\," : String.valueOf(decimalFormatSymbols.getGroupingSeparator());
|
||||
ff.assertContentEquals(String.format("1234%1$s46," + DateTimeFormatter.ofPattern("hh:mm:ss a").format(localDt) + ",£ 123%1$s45\n" +
|
||||
"1234%1$s5," + DateTimeFormatter.ofPattern("EEEE\\, MMMM dd\\, yyyy").format(localDt) + ",¥ 123%1$s45\n" +
|
||||
"1%2$s234%1$s46," + DateTimeFormatter.ofPattern("d/M/yy HH:mm").format(localDt) + ",$ 1%2$s023%1$s45\n" +
|
||||
"1%2$s234%1$s4560," + DateTimeFormatter.ofPattern("hh:mm a").format(localDt) + ",£ 1%2$s023%1$s45\n" +
|
||||
"9%1$s88E+08," + DateTimeFormatter.ofPattern("yyyy/MM/dd/ HH:mm").format(localDt) + ",¥ 1%2$s023%1$s45\n" +
|
||||
"9%1$s877E+08,,\n" +
|
||||
"9%1$s8765E+08,,\n", decimalSeparator, groupingSeparator).replace("E+", getExponentSeparator(decimalFormatSymbols)));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testSkipRowsWithEL() {
|
||||
Map<String, String> attributes = new HashMap<>();
|
||||
attributes.put("rowsToSkip", "2");
|
||||
testRunner.enqueue(getClass().getResourceAsStream("/dataformatting.xlsx"), attributes);
|
||||
|
||||
testRunner.setProperty(ConvertExcelToCSVProcessor.ROWS_TO_SKIP, "${rowsToSkip}");
|
||||
testRunner.setProperty(ConvertExcelToCSVProcessor.FORMAT_VALUES, "true");
|
||||
|
||||
testRunner.run();
|
||||
|
||||
testRunner.assertTransferCount(ConvertExcelToCSVProcessor.SUCCESS, 1);
|
||||
testRunner.assertTransferCount(ConvertExcelToCSVProcessor.ORIGINAL, 1);
|
||||
testRunner.assertTransferCount(ConvertExcelToCSVProcessor.FAILURE, 0);
|
||||
|
||||
MockFlowFile ff = testRunner.getFlowFilesForRelationship(ConvertExcelToCSVProcessor.SUCCESS).get(0);
|
||||
long rowsSheet = Long.parseLong(ff.getAttribute(ConvertExcelToCSVProcessor.ROW_NUM));
|
||||
assertEquals(7, rowsSheet, "Row count does match expected value.");
|
||||
|
||||
LocalDateTime localDt = LocalDateTime.of(2017, 1, 1, 12, 0, 0);
|
||||
DecimalFormatSymbols decimalFormatSymbols = DecimalFormatSymbols.getInstance();
|
||||
String decimalSeparator = decimalFormatSymbols.getDecimalSeparator() == ',' ? "\\," : String.valueOf(decimalFormatSymbols.getDecimalSeparator());
|
||||
String groupingSeparator = decimalFormatSymbols.getGroupingSeparator() == ',' ? "\\," : String.valueOf(decimalFormatSymbols.getGroupingSeparator());
|
||||
ff.assertContentEquals(String.format("1234%1$s46," + DateTimeFormatter.ofPattern("hh:mm:ss a").format(localDt) + ",£ 123%1$s45\n" +
|
||||
"1234%1$s5," + DateTimeFormatter.ofPattern("EEEE\\, MMMM dd\\, yyyy").format(localDt) + ",¥ 123%1$s45\n" +
|
||||
"1%2$s234%1$s46," + DateTimeFormatter.ofPattern("d/M/yy HH:mm").format(localDt) + ",$ 1%2$s023%1$s45\n" +
|
||||
"1%2$s234%1$s4560," + DateTimeFormatter.ofPattern("hh:mm a").format(localDt) + ",£ 1%2$s023%1$s45\n" +
|
||||
"9%1$s88E+08," + DateTimeFormatter.ofPattern("yyyy/MM/dd/ HH:mm").format(localDt) + ",¥ 1%2$s023%1$s45\n" +
|
||||
"9%1$s877E+08,,\n" +
|
||||
"9%1$s8765E+08,,\n", decimalSeparator, groupingSeparator).replace("E+", getExponentSeparator(decimalFormatSymbols)));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testSkipColumns() throws Exception {
|
||||
testRunner.enqueue(new File("src/test/resources/dataformatting.xlsx").toPath());
|
||||
|
||||
testRunner.setProperty(ConvertExcelToCSVProcessor.COLUMNS_TO_SKIP, "2");
|
||||
testRunner.setProperty(ConvertExcelToCSVProcessor.FORMAT_VALUES, "true");
|
||||
|
||||
testRunner.run();
|
||||
|
||||
testRunner.assertTransferCount(ConvertExcelToCSVProcessor.SUCCESS, 1);
|
||||
testRunner.assertTransferCount(ConvertExcelToCSVProcessor.ORIGINAL, 1);
|
||||
testRunner.assertTransferCount(ConvertExcelToCSVProcessor.FAILURE, 0);
|
||||
|
||||
MockFlowFile ff = testRunner.getFlowFilesForRelationship(ConvertExcelToCSVProcessor.SUCCESS).get(0);
|
||||
long rowsSheet = Long.parseLong(ff.getAttribute(ConvertExcelToCSVProcessor.ROW_NUM));
|
||||
assertEquals(9, rowsSheet);
|
||||
|
||||
DecimalFormatSymbols decimalFormatSymbols = DecimalFormatSymbols.getInstance();
|
||||
String decimalSeparator = decimalFormatSymbols.getDecimalSeparator() == ',' ? "\\," : String.valueOf(decimalFormatSymbols.getDecimalSeparator());
|
||||
String groupingSeparator = decimalFormatSymbols.getGroupingSeparator() == ',' ? "\\," : String.valueOf(decimalFormatSymbols.getGroupingSeparator());
|
||||
ff.assertContentEquals(String.format("Numbers,Money\n" +
|
||||
"1234%1$s456,$ 123%1$s45\n" +
|
||||
"1234%1$s46,£ 123%1$s45\n" +
|
||||
"1234%1$s5,¥ 123%1$s45\n" +
|
||||
"1%2$s234%1$s46,$ 1%2$s023%1$s45\n" +
|
||||
"1%2$s234%1$s4560,£ 1%2$s023%1$s45\n" +
|
||||
"9%1$s88E+08,¥ 1%2$s023%1$s45\n" +
|
||||
"9%1$s877E+08,\n" +
|
||||
"9%1$s8765E+08,\n", decimalSeparator, groupingSeparator).replace("E+", getExponentSeparator(decimalFormatSymbols)));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testSkipColumnsWithEL() throws Exception {
|
||||
Map<String, String> attributes = new HashMap<>();
|
||||
attributes.put("columnsToSkip", "2");
|
||||
testRunner.enqueue(new File("src/test/resources/dataformatting.xlsx").toPath(), attributes);
|
||||
|
||||
testRunner.setProperty(ConvertExcelToCSVProcessor.COLUMNS_TO_SKIP, "${columnsToSkip}");
|
||||
testRunner.setProperty(ConvertExcelToCSVProcessor.FORMAT_VALUES, "true");
|
||||
|
||||
testRunner.run();
|
||||
|
||||
testRunner.assertTransferCount(ConvertExcelToCSVProcessor.SUCCESS, 1);
|
||||
testRunner.assertTransferCount(ConvertExcelToCSVProcessor.ORIGINAL, 1);
|
||||
testRunner.assertTransferCount(ConvertExcelToCSVProcessor.FAILURE, 0);
|
||||
|
||||
MockFlowFile ff = testRunner.getFlowFilesForRelationship(ConvertExcelToCSVProcessor.SUCCESS).get(0);
|
||||
long rowsSheet = Long.parseLong(ff.getAttribute(ConvertExcelToCSVProcessor.ROW_NUM));
|
||||
assertEquals(9, rowsSheet);
|
||||
|
||||
DecimalFormatSymbols decimalFormatSymbols = DecimalFormatSymbols.getInstance();
|
||||
String decimalSeparator = decimalFormatSymbols.getDecimalSeparator() == ',' ? "\\," : String.valueOf(decimalFormatSymbols.getDecimalSeparator());
|
||||
String groupingSeparator = decimalFormatSymbols.getGroupingSeparator() == ',' ? "\\," : String.valueOf(decimalFormatSymbols.getGroupingSeparator());
|
||||
ff.assertContentEquals(String.format("Numbers,Money\n" +
|
||||
"1234%1$s456,$ 123%1$s45\n" +
|
||||
"1234%1$s46,£ 123%1$s45\n" +
|
||||
"1234%1$s5,¥ 123%1$s45\n" +
|
||||
"1%2$s234%1$s46,$ 1%2$s023%1$s45\n" +
|
||||
"1%2$s234%1$s4560,£ 1%2$s023%1$s45\n" +
|
||||
"9%1$s88E+08,¥ 1%2$s023%1$s45\n" +
|
||||
"9%1$s877E+08,\n" +
|
||||
"9%1$s8765E+08,\n", decimalSeparator, groupingSeparator).replace("E+", getExponentSeparator(decimalFormatSymbols)));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testCustomDelimiters() throws Exception {
|
||||
testRunner.enqueue(new File("src/test/resources/dataformatting.xlsx").toPath());
|
||||
|
||||
testRunner.setProperty(CSVUtils.VALUE_SEPARATOR, "|");
|
||||
testRunner.setProperty(CSVUtils.RECORD_SEPARATOR, "\\r\\n");
|
||||
testRunner.setProperty(ConvertExcelToCSVProcessor.FORMAT_VALUES, "true");
|
||||
|
||||
testRunner.run();
|
||||
|
||||
testRunner.assertTransferCount(ConvertExcelToCSVProcessor.SUCCESS, 1);
|
||||
testRunner.assertTransferCount(ConvertExcelToCSVProcessor.ORIGINAL, 1);
|
||||
testRunner.assertTransferCount(ConvertExcelToCSVProcessor.FAILURE, 0);
|
||||
|
||||
MockFlowFile ff = testRunner.getFlowFilesForRelationship(ConvertExcelToCSVProcessor.SUCCESS).get(0);
|
||||
long rowsSheet = Long.parseLong(ff.getAttribute(ConvertExcelToCSVProcessor.ROW_NUM));
|
||||
assertEquals(9, rowsSheet);
|
||||
|
||||
LocalDateTime localDt = LocalDateTime.of(2017, 1, 1, 12, 0, 0);
|
||||
DecimalFormatSymbols decimalFormatSymbols = DecimalFormatSymbols.getInstance();
|
||||
String valueSeparator = testRunner.getProcessContext().getProperty(CSVUtils.VALUE_SEPARATOR).evaluateAttributeExpressions(ff).getValue();
|
||||
String decimalSeparator = (String.valueOf(decimalFormatSymbols.getDecimalSeparator()).equals(valueSeparator))
|
||||
? ("\\" + decimalFormatSymbols.getDecimalSeparator()) : String.valueOf(decimalFormatSymbols.getDecimalSeparator());
|
||||
String groupingSeparator = String.valueOf(decimalFormatSymbols.getGroupingSeparator()).equals(valueSeparator)
|
||||
? "\\" + decimalFormatSymbols.getGroupingSeparator() : String.valueOf(decimalFormatSymbols.getGroupingSeparator());
|
||||
ff.assertContentEquals(String.format("Numbers|Timestamps|Money\r\n" +
|
||||
"1234%1$s456|" + DateTimeFormatter.ofPattern("d/M/yy").format(localDt) + "|$ 123%1$s45\r\n" +
|
||||
"1234%1$s46|" + DateTimeFormatter.ofPattern("hh:mm:ss a").format(localDt) + "|£ 123%1$s45\r\n" +
|
||||
"1234%1$s5|" + DateTimeFormatter.ofPattern("EEEE, MMMM dd, yyyy").format(localDt) + "|¥ 123%1$s45\r\n" +
|
||||
"1%2$s234%1$s46|" + DateTimeFormatter.ofPattern("d/M/yy HH:mm").format(localDt) + "|$ 1%2$s023%1$s45\r\n" +
|
||||
"1%2$s234%1$s4560|" + DateTimeFormatter.ofPattern("hh:mm a").format(localDt) + "|£ 1%2$s023%1$s45\r\n" +
|
||||
"9%1$s88E+08|" + DateTimeFormatter.ofPattern("yyyy/MM/dd/ HH:mm").format(localDt) + "|¥ 1%2$s023%1$s45\r\n" +
|
||||
"9%1$s877E+08||\r\n" +
|
||||
"9%1$s8765E+08||\r\n", decimalSeparator, groupingSeparator).replace("E+", getExponentSeparator(decimalFormatSymbols)));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testCustomValueSeparatorWithEL() throws Exception {
|
||||
Map<String, String> attributes = new HashMap<>();
|
||||
attributes.put("csv.delimiter", "|");
|
||||
testRunner.enqueue(new File("src/test/resources/dataformatting.xlsx").toPath(), attributes);
|
||||
|
||||
testRunner.setProperty(CSVUtils.VALUE_SEPARATOR, "${csv.delimiter}");
|
||||
testRunner.setProperty(ConvertExcelToCSVProcessor.FORMAT_VALUES, "true");
|
||||
|
||||
testRunner.run();
|
||||
|
||||
testRunner.assertTransferCount(ConvertExcelToCSVProcessor.SUCCESS, 1);
|
||||
testRunner.assertTransferCount(ConvertExcelToCSVProcessor.ORIGINAL, 1);
|
||||
testRunner.assertTransferCount(ConvertExcelToCSVProcessor.FAILURE, 0);
|
||||
|
||||
MockFlowFile ff = testRunner.getFlowFilesForRelationship(ConvertExcelToCSVProcessor.SUCCESS).get(0);
|
||||
long rowsSheet = Long.parseLong(ff.getAttribute(ConvertExcelToCSVProcessor.ROW_NUM));
|
||||
assertEquals(9, rowsSheet);
|
||||
|
||||
LocalDateTime localDt = LocalDateTime.of(2017, 1, 1, 12, 0, 0);
|
||||
DecimalFormatSymbols decimalFormatSymbols = DecimalFormatSymbols.getInstance();
|
||||
String valueSeparator = testRunner.getProcessContext().getProperty(CSVUtils.VALUE_SEPARATOR).evaluateAttributeExpressions(ff).getValue();
|
||||
String decimalSeparator = (String.valueOf(decimalFormatSymbols.getDecimalSeparator()).equals(valueSeparator))
|
||||
? ("\\" + decimalFormatSymbols.getDecimalSeparator()) : String.valueOf(decimalFormatSymbols.getDecimalSeparator());
|
||||
String groupingSeparator = String.valueOf(decimalFormatSymbols.getGroupingSeparator()).equals(valueSeparator)
|
||||
? "\\" + decimalFormatSymbols.getGroupingSeparator() : String.valueOf(decimalFormatSymbols.getGroupingSeparator());
|
||||
ff.assertContentEquals(String.format("Numbers|Timestamps|Money\n" +
|
||||
"1234%1$s456|" + DateTimeFormatter.ofPattern("d/M/yy").format(localDt) + "|$ 123%1$s45\n" +
|
||||
"1234%1$s46|" + DateTimeFormatter.ofPattern("hh:mm:ss a").format(localDt) + "|£ 123%1$s45\n" +
|
||||
"1234%1$s5|" + DateTimeFormatter.ofPattern("EEEE, MMMM dd, yyyy").format(localDt) + "|¥ 123%1$s45\n" +
|
||||
"1%2$s234%1$s46|" + DateTimeFormatter.ofPattern("d/M/yy HH:mm").format(localDt) + "|$ 1%2$s023%1$s45\n" +
|
||||
"1%2$s234%1$s4560|" + DateTimeFormatter.ofPattern("hh:mm a").format(localDt) + "|£ 1%2$s023%1$s45\n" +
|
||||
"9%1$s88E+08|" + DateTimeFormatter.ofPattern("yyyy/MM/dd/ HH:mm").format(localDt) + "|¥ 1%2$s023%1$s45\n" +
|
||||
"9%1$s877E+08||\n" +
|
||||
"9%1$s8765E+08||\n", decimalSeparator, groupingSeparator).replace("E+", getExponentSeparator(decimalFormatSymbols)));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testCustomQuoteCharWithEL() throws Exception {
|
||||
Map<String, String> attributes = new HashMap<>();
|
||||
attributes.put("csv.quote", "'");
|
||||
testRunner.enqueue(new File("src/test/resources/dataformatting.xlsx").toPath(), attributes);
|
||||
|
||||
testRunner.setProperty(CSVUtils.QUOTE_CHAR, "${csv.quote}");
|
||||
testRunner.setProperty(ConvertExcelToCSVProcessor.FORMAT_VALUES, "true");
|
||||
testRunner.setProperty(CSVUtils.QUOTE_MODE, CSVUtils.QUOTE_ALL);
|
||||
|
||||
testRunner.run();
|
||||
|
||||
testRunner.assertTransferCount(ConvertExcelToCSVProcessor.SUCCESS, 1);
|
||||
testRunner.assertTransferCount(ConvertExcelToCSVProcessor.ORIGINAL, 1);
|
||||
testRunner.assertTransferCount(ConvertExcelToCSVProcessor.FAILURE, 0);
|
||||
|
||||
MockFlowFile ff = testRunner.getFlowFilesForRelationship(ConvertExcelToCSVProcessor.SUCCESS).get(0);
|
||||
long rowsSheet = Long.parseLong(ff.getAttribute(ConvertExcelToCSVProcessor.ROW_NUM));
|
||||
assertEquals(9, rowsSheet);
|
||||
|
||||
LocalDateTime localDt = LocalDateTime.of(2017, 1, 1, 12, 0, 0);
|
||||
String quoteCharValue = testRunner.getProcessContext().getProperty(CSVUtils.QUOTE_CHAR).evaluateAttributeExpressions(ff).getValue();
|
||||
DecimalFormatSymbols decimalFormatSymbols = DecimalFormatSymbols.getInstance();
|
||||
char decimalSeparator = decimalFormatSymbols.getDecimalSeparator();
|
||||
char groupingSeparator = decimalFormatSymbols.getGroupingSeparator();
|
||||
ff.assertContentEquals(("'Numbers','Timestamps','Money'\n" +
|
||||
addQuotingIfNeeded(String.format("1234%1$s456", decimalSeparator), quoteCharValue, true) + "," + quoteCharValue +
|
||||
DateTimeFormatter.ofPattern("d/M/yy").format(localDt) + quoteCharValue + "," +
|
||||
addQuotingIfNeeded(String.format("$ 123%1$s45", decimalSeparator), quoteCharValue, true) + "\n" +
|
||||
addQuotingIfNeeded(String.format("1234%1$s46", decimalSeparator), quoteCharValue, true) + "," + quoteCharValue +
|
||||
DateTimeFormatter.ofPattern("hh:mm:ss a").format(localDt) + quoteCharValue + "," +
|
||||
addQuotingIfNeeded(String.format("£ 123%1$s45", decimalSeparator), quoteCharValue, true) + "\n" +
|
||||
addQuotingIfNeeded(String.format("1234%1$s5", decimalSeparator), quoteCharValue, true) + "," + quoteCharValue +
|
||||
DateTimeFormatter.ofPattern("EEEE, MMMM dd, yyyy").format(localDt) + quoteCharValue + "," +
|
||||
addQuotingIfNeeded(String.format("¥ 123%1$s45", decimalSeparator), quoteCharValue, true) + "\n" +
|
||||
addQuotingIfNeeded(String.format("1%2$s234%1$s46", decimalSeparator, groupingSeparator), quoteCharValue, true) + "," + quoteCharValue +
|
||||
DateTimeFormatter.ofPattern("d/M/yy HH:mm").format(localDt) + quoteCharValue + "," +
|
||||
addQuotingIfNeeded(String.format("$ 1%2$s023%1$s45", decimalSeparator, groupingSeparator), quoteCharValue, true) + "\n" +
|
||||
addQuotingIfNeeded(String.format("1%2$s234%1$s4560", decimalSeparator, groupingSeparator), quoteCharValue, true) + "," + quoteCharValue +
|
||||
DateTimeFormatter.ofPattern("hh:mm a").format(localDt) + quoteCharValue + "," +
|
||||
addQuotingIfNeeded(String.format("£ 1%2$s023%1$s45", decimalSeparator, groupingSeparator), quoteCharValue, true) + "\n" +
|
||||
addQuotingIfNeeded(String.format("9%1$s88E+08", decimalSeparator), quoteCharValue, true) + "," + quoteCharValue +
|
||||
DateTimeFormatter.ofPattern("yyyy/MM/dd/ HH:mm").format(localDt) + quoteCharValue + "," +
|
||||
addQuotingIfNeeded(String.format("¥ 1%2$s023%1$s45", decimalSeparator, groupingSeparator), quoteCharValue, true) + "\n" +
|
||||
addQuotingIfNeeded(String.format("9%1$s877E+08", decimalSeparator), quoteCharValue, true) + ",,\n" +
|
||||
addQuotingIfNeeded(String.format("9%1$s8765E+08", decimalSeparator), quoteCharValue, true) + ",,\n").replace("E+", getExponentSeparator(decimalFormatSymbols)));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testCustomEscapeCharWithEL() throws Exception {
|
||||
Map<String, String> attributes = new HashMap<>();
|
||||
attributes.put("csv.escape", "^");
|
||||
testRunner.enqueue(new File("src/test/resources/dataformatting.xlsx").toPath(), attributes);
|
||||
|
||||
testRunner.setProperty(CSVUtils.ESCAPE_CHAR, "${csv.escape}");
|
||||
testRunner.setProperty(ConvertExcelToCSVProcessor.FORMAT_VALUES, "true");
|
||||
|
||||
testRunner.run();
|
||||
|
||||
testRunner.assertTransferCount(ConvertExcelToCSVProcessor.SUCCESS, 1);
|
||||
testRunner.assertTransferCount(ConvertExcelToCSVProcessor.ORIGINAL, 1);
|
||||
testRunner.assertTransferCount(ConvertExcelToCSVProcessor.FAILURE, 0);
|
||||
|
||||
MockFlowFile ff = testRunner.getFlowFilesForRelationship(ConvertExcelToCSVProcessor.SUCCESS).get(0);
|
||||
long rowsSheet = Long.parseLong(ff.getAttribute(ConvertExcelToCSVProcessor.ROW_NUM));
|
||||
assertEquals(9, rowsSheet);
|
||||
|
||||
LocalDateTime localDt = LocalDateTime.of(2017, 1, 1, 12, 0, 0);
|
||||
DecimalFormatSymbols decimalFormatSymbols = DecimalFormatSymbols.getInstance();
|
||||
String escapeCharValue = testRunner.getProcessContext().getProperty(CSVUtils.ESCAPE_CHAR).evaluateAttributeExpressions(ff).getValue();
|
||||
String decimalSeparator = String.valueOf(decimalFormatSymbols.getDecimalSeparator()).equals(",")
|
||||
? escapeCharValue + decimalFormatSymbols.getDecimalSeparator() : String.valueOf(decimalFormatSymbols.getDecimalSeparator());
|
||||
String groupingSeparator = String.valueOf(decimalFormatSymbols.getGroupingSeparator()).equals(",")
|
||||
? escapeCharValue + decimalFormatSymbols.getGroupingSeparator() : String.valueOf(decimalFormatSymbols.getGroupingSeparator());
|
||||
ff.assertContentEquals(String.format("Numbers,Timestamps,Money\n" +
|
||||
"1234%1$s456," + DateTimeFormatter.ofPattern("d/M/yy").format(localDt) + ",$ 123%1$s45\n" +
|
||||
"1234%1$s46," + DateTimeFormatter.ofPattern("hh:mm:ss a").format(localDt) + ",£ 123%1$s45\n" +
|
||||
"1234%1$s5," + DateTimeFormatter.ofPattern(String.format("EEEE%1$s, MMMM dd%1$s, yyyy", escapeCharValue)).format(localDt) + ",¥ 123%1$s45\n" +
|
||||
"1%2$s234%1$s46," + DateTimeFormatter.ofPattern("d/M/yy HH:mm").format(localDt) + ",$ 1%2$s023%1$s45\n" +
|
||||
"1%2$s234%1$s4560," + DateTimeFormatter.ofPattern("hh:mm a").format(localDt) + ",£ 1%2$s023%1$s45\n" +
|
||||
"9%1$s88E+08," + DateTimeFormatter.ofPattern("yyyy/MM/dd/ HH:mm").format(localDt) + ",¥ 1%2$s023%1$s45\n" +
|
||||
"9%1$s877E+08,,\n" +
|
||||
"9%1$s8765E+08,,\n", decimalSeparator, groupingSeparator).replace("E+", getExponentSeparator(decimalFormatSymbols)));
|
||||
}
|
||||
|
||||
/**
|
||||
* Validates that all sheets in the Excel document are exported.
|
||||
*
|
||||
* @throws Exception
|
||||
* Any exception thrown during execution.
|
||||
*/
|
||||
@Test
|
||||
public void testProcessAllSheets() throws Exception {
|
||||
|
||||
testRunner.enqueue(new File("src/test/resources/CollegeScorecard.xlsx").toPath());
|
||||
testRunner.run();
|
||||
|
||||
testRunner.assertTransferCount(ConvertExcelToCSVProcessor.SUCCESS, 1);
|
||||
testRunner.assertTransferCount(ConvertExcelToCSVProcessor.ORIGINAL, 1);
|
||||
testRunner.assertTransferCount(ConvertExcelToCSVProcessor.FAILURE, 0);
|
||||
|
||||
MockFlowFile ff = testRunner.getFlowFilesForRelationship(ConvertExcelToCSVProcessor.SUCCESS).get(0);
|
||||
long l = Long.parseLong(ff.getAttribute(ConvertExcelToCSVProcessor.ROW_NUM));
|
||||
assertEquals(10, l);
|
||||
|
||||
testRunner.clearProvenanceEvents();
|
||||
testRunner.clearTransferState();
|
||||
|
||||
testRunner.enqueue(new File("src/test/resources/TwoSheets.xlsx").toPath());
|
||||
testRunner.run();
|
||||
|
||||
testRunner.assertTransferCount(ConvertExcelToCSVProcessor.SUCCESS, 2);
|
||||
testRunner.assertTransferCount(ConvertExcelToCSVProcessor.ORIGINAL, 1);
|
||||
testRunner.assertTransferCount(ConvertExcelToCSVProcessor.FAILURE, 0);
|
||||
|
||||
ff = testRunner.getFlowFilesForRelationship(ConvertExcelToCSVProcessor.SUCCESS).get(0);
|
||||
l = Long.parseLong(ff.getAttribute(ConvertExcelToCSVProcessor.ROW_NUM));
|
||||
assertEquals(4, l);
|
||||
|
||||
ff = testRunner.getFlowFilesForRelationship(ConvertExcelToCSVProcessor.SUCCESS).get(1);
|
||||
l = Long.parseLong(ff.getAttribute(ConvertExcelToCSVProcessor.ROW_NUM));
|
||||
assertEquals(3, l);
|
||||
}
|
||||
|
||||
/**
|
||||
* Validates that the manually specified sheet is exported from the Excel document.
|
||||
*
|
||||
* @throws Exception
|
||||
* Any exception thrown during execution.
|
||||
*/
|
||||
@Test
|
||||
public void testProcessASpecificSheetThatDoesExist() throws Exception {
|
||||
|
||||
testRunner.setProperty(ConvertExcelToCSVProcessor.DESIRED_SHEETS, "Scorecard");
|
||||
testRunner.enqueue(new File("src/test/resources/CollegeScorecard.xlsx").toPath());
|
||||
testRunner.run();
|
||||
|
||||
testRunner.assertTransferCount(ConvertExcelToCSVProcessor.SUCCESS, 1);
|
||||
testRunner.assertTransferCount(ConvertExcelToCSVProcessor.ORIGINAL, 1);
|
||||
testRunner.assertTransferCount(ConvertExcelToCSVProcessor.FAILURE, 0);
|
||||
|
||||
MockFlowFile ff = testRunner.getFlowFilesForRelationship(ConvertExcelToCSVProcessor.SUCCESS).get(0);
|
||||
long l = Long.parseLong(ff.getAttribute(ConvertExcelToCSVProcessor.ROW_NUM));
|
||||
assertEquals(10, l);
|
||||
}
|
||||
|
||||
/**
|
||||
* Tests for a syntactically valid Excel XSSF document with a manually specified Excel sheet that does not exist.
|
||||
* In this scenario only the Original relationship should be invoked.
|
||||
*
|
||||
* @throws Exception
|
||||
* Any exception thrown during execution.
|
||||
*/
|
||||
@Test
|
||||
public void testNonExistantSpecifiedSheetName() throws Exception {
|
||||
|
||||
testRunner.setProperty(ConvertExcelToCSVProcessor.DESIRED_SHEETS, "NopeIDoNotExist");
|
||||
testRunner.enqueue(new File("src/test/resources/CollegeScorecard.xlsx").toPath());
|
||||
testRunner.run();
|
||||
|
||||
testRunner.assertTransferCount(ConvertExcelToCSVProcessor.SUCCESS, 0); //We aren't expecting any output to success here because the sheet doesn't exist
|
||||
testRunner.assertTransferCount(ConvertExcelToCSVProcessor.ORIGINAL, 1);
|
||||
testRunner.assertTransferCount(ConvertExcelToCSVProcessor.FAILURE, 0);
|
||||
assertFalse(testRunner.getLogger().getWarnMessages().isEmpty());
|
||||
}
|
||||
|
||||
/**
|
||||
* Validates that a sheet contains blank cells can be converted to a CSV without missing columns.
|
||||
*
|
||||
* @throws Exception
|
||||
* Any exception thrown during execution.
|
||||
*/
|
||||
@Test
|
||||
public void testProcessASheetWithBlankCells() throws Exception {
|
||||
|
||||
testRunner.setProperty(ConvertExcelToCSVProcessor.DESIRED_SHEETS, "Sheet1");
|
||||
testRunner.enqueue(new File("src/test/resources/with-blank-cells.xlsx").toPath());
|
||||
testRunner.run();
|
||||
|
||||
testRunner.assertTransferCount(ConvertExcelToCSVProcessor.SUCCESS, 1);
|
||||
testRunner.assertTransferCount(ConvertExcelToCSVProcessor.ORIGINAL, 1);
|
||||
testRunner.assertTransferCount(ConvertExcelToCSVProcessor.FAILURE, 0);
|
||||
|
||||
MockFlowFile ff = testRunner.getFlowFilesForRelationship(ConvertExcelToCSVProcessor.SUCCESS).get(0);
|
||||
long l = Long.parseLong(ff.getAttribute(ConvertExcelToCSVProcessor.ROW_NUM));
|
||||
assertEquals(8, l);
|
||||
|
||||
ff.assertContentEquals(new File("src/test/resources/with-blank-cells.csv"));
|
||||
}
|
||||
|
||||
/**
|
||||
* Tests for graceful handling and error messaging of unsupported .XLS files.
|
||||
*/
|
||||
@Test
|
||||
public void testHandleUnsupportedXlsFile() throws Exception {
|
||||
|
||||
testRunner.enqueue(new File("src/test/resources/Unsupported.xls").toPath());
|
||||
testRunner.run();
|
||||
|
||||
testRunner.assertTransferCount(ConvertExcelToCSVProcessor.SUCCESS, 0);
|
||||
testRunner.assertTransferCount(ConvertExcelToCSVProcessor.ORIGINAL, 0);
|
||||
testRunner.assertTransferCount(ConvertExcelToCSVProcessor.FAILURE, 1);
|
||||
|
||||
List<LogMessage> errorMessages = testRunner.getLogger().getErrorMessages();
|
||||
assertEquals(1, errorMessages.size());
|
||||
String messageText = errorMessages.get(0).getMsg();
|
||||
assertTrue(messageText.contains("Excel") && messageText.contains("OLE2"));
|
||||
}
|
||||
|
||||
private String addQuotingIfNeeded(String csvField) {
|
||||
return addQuotingIfNeeded(csvField, "\"", false);
|
||||
}
|
||||
|
||||
private String addQuotingIfNeeded(String csvField, String csvQuote, boolean force) {
|
||||
return csvField.contains(",") || force ? String.format("%2$s%1$s%2$s", csvField, csvQuote) : csvField;
|
||||
}
|
||||
}
|
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
@ -1,32 +0,0 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<!--
|
||||
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
contributor license agreements. See the NOTICE file distributed with
|
||||
this work for additional information regarding copyright ownership.
|
||||
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
(the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
-->
|
||||
|
||||
<configuration scan="true" scanPeriod="30 seconds">
|
||||
<appender name="CONSOLE" class="ch.qos.logback.core.ConsoleAppender">
|
||||
<encoder class="ch.qos.logback.classic.encoder.PatternLayoutEncoder">
|
||||
<pattern>%-4r [%t] %-5p %c - %m%n</pattern>
|
||||
</encoder>
|
||||
</appender>
|
||||
|
||||
<!-- valid logging levels: TRACE, DEBUG, INFO, WARN, ERROR -->
|
||||
<logger name="org.apache.nifi" level="WARN"/>
|
||||
|
||||
<root level="INFO">
|
||||
<appender-ref ref="CONSOLE"/>
|
||||
</root>
|
||||
|
||||
</configuration>
|
||||
|
@ -1,8 +0,0 @@
|
||||
A,B,C,D
|
||||
A1,,,
|
||||
,B2,C2,
|
||||
,,C3,
|
||||
,,C4,D4
|
||||
A5,,C5,D5
|
||||
A6,B6,,D6
|
||||
A7,B7,C7,D7
|
|
Binary file not shown.
@ -28,7 +28,6 @@
|
||||
<poi.version>5.2.3</poi.version>
|
||||
</properties>
|
||||
<modules>
|
||||
<module>nifi-poi-processors</module>
|
||||
<module>nifi-poi-nar</module>
|
||||
<module>nifi-poi-services</module>
|
||||
</modules>
|
||||
|
Loading…
x
Reference in New Issue
Block a user