NIFI-5017 - ConvertExcelToCSVProcessor - EL support for rows and columns to skip

Signed-off-by: Pierre Villard <pierre.villard.fr@gmail.com>

This closes #2597.
This commit is contained in:
Arun A K 2018-03-29 16:21:05 +00:00 committed by Pierre Villard
parent c4dcde837e
commit 5d90d14f17
2 changed files with 66 additions and 6 deletions

View File

@ -108,6 +108,7 @@ public class ConvertExcelToCSVProcessor
+ "Empty rows of data anywhere in the spreadsheet will always be skipped, no matter what this value is set to.")
.required(true)
.defaultValue("0")
.expressionLanguageSupported(true)
.addValidator(StandardValidators.NON_NEGATIVE_INTEGER_VALIDATOR)
.build();
@ -117,6 +118,7 @@ public class ConvertExcelToCSVProcessor
.description("Comma delimited list of column numbers to skip. Use the columns number and not the letter designation. "
+ "Use this to skip over columns anywhere in your worksheet that you don't want extracted as part of the record.")
.required(false)
.expressionLanguageSupported(true)
.addValidator(StandardValidators.NON_EMPTY_VALIDATOR)
.build();
@ -165,9 +167,9 @@ public class ConvertExcelToCSVProcessor
descriptors.add(CSVUtils.NULL_STRING);
descriptors.add(CSVUtils.TRIM_FIELDS);
descriptors.add(new PropertyDescriptor.Builder()
.fromPropertyDescriptor(CSVUtils.QUOTE_MODE)
.defaultValue(CSVUtils.QUOTE_NONE.getValue())
.build());
.fromPropertyDescriptor(CSVUtils.QUOTE_MODE)
.defaultValue(CSVUtils.QUOTE_NONE.getValue())
.build());
descriptors.add(CSVUtils.RECORD_SEPARATOR);
descriptors.add(CSVUtils.TRAILING_DELIMITER);
this.descriptors = Collections.unmodifiableList(descriptors);
@ -196,15 +198,15 @@ public class ConvertExcelToCSVProcessor
return;
}
final String desiredSheetsDelimited = context.getProperty(DESIRED_SHEETS).evaluateAttributeExpressions().getValue();
final String desiredSheetsDelimited = context.getProperty(DESIRED_SHEETS).evaluateAttributeExpressions(flowFile).getValue();
final boolean formatValues = context.getProperty(FORMAT_VALUES).asBoolean();
final CSVFormat csvFormat = CSVUtils.createCSVFormat(context);
//Switch to 0 based index
final int firstRow = context.getProperty(ROWS_TO_SKIP).asInteger() - 1;
final int firstRow = context.getProperty(ROWS_TO_SKIP).evaluateAttributeExpressions(flowFile).asInteger() - 1;
final String[] sColumnsToSkip = StringUtils
.split(context.getProperty(COLUMNS_TO_SKIP).getValue(), ",");
.split(context.getProperty(COLUMNS_TO_SKIP).evaluateAttributeExpressions(flowFile).getValue(), ",");
final List<Integer> columnsToSkip = new ArrayList<>();

View File

@ -159,6 +159,34 @@ public class ConvertExcelToCSVProcessorTest {
"9.8765E+08,,\n");
}
@Test
public void testSkipRowsWithEL() throws Exception {
Map<String, String> attributes = new HashMap<String, String>();
attributes.put("rowsToSkip", "2");
testRunner.enqueue(new File("src/test/resources/dataformatting.xlsx").toPath(),attributes);
testRunner.setProperty(ConvertExcelToCSVProcessor.ROWS_TO_SKIP, "${rowsToSkip}");
testRunner.setProperty(ConvertExcelToCSVProcessor.FORMAT_VALUES, "true");
testRunner.run();
testRunner.assertTransferCount(ConvertExcelToCSVProcessor.SUCCESS, 1);
testRunner.assertTransferCount(ConvertExcelToCSVProcessor.ORIGINAL, 1);
testRunner.assertTransferCount(ConvertExcelToCSVProcessor.FAILURE, 0);
MockFlowFile ff = testRunner.getFlowFilesForRelationship(ConvertExcelToCSVProcessor.SUCCESS).get(0);
Long rowsSheet = new Long(ff.getAttribute(ConvertExcelToCSVProcessor.ROW_NUM));
assertEquals("Row count does match expected value.", "7", rowsSheet.toString());
ff.assertContentEquals("1234.46,12:00:00 PM,£ 123.45\n" +
"1234.5,Sunday\\, January 01\\, 2017,¥ 123.45\n" +
"1\\,234.46,1/1/17 12:00,$ 1\\,023.45\n" +
"1\\,234.4560,12:00 PM,£ 1\\,023.45\n" +
"9.88E+08,2017/01/01/ 12:00,¥ 1\\,023.45\n" +
"9.877E+08,,\n" +
"9.8765E+08,,\n");
}
@Test
public void testSkipColumns() throws Exception {
testRunner.enqueue(new File("src/test/resources/dataformatting.xlsx").toPath());
@ -187,6 +215,36 @@ public class ConvertExcelToCSVProcessorTest {
"9.8765E+08,\n");
}
@Test
public void testSkipColumnsWithEL() throws Exception {
Map<String, String> attributes = new HashMap<String, String>();
attributes.put("columnsToSkip", "2");
testRunner.enqueue(new File("src/test/resources/dataformatting.xlsx").toPath(),attributes);
testRunner.setProperty(ConvertExcelToCSVProcessor.COLUMNS_TO_SKIP, "${columnsToSkip}");
testRunner.setProperty(ConvertExcelToCSVProcessor.FORMAT_VALUES, "true");
testRunner.run();
testRunner.assertTransferCount(ConvertExcelToCSVProcessor.SUCCESS, 1);
testRunner.assertTransferCount(ConvertExcelToCSVProcessor.ORIGINAL, 1);
testRunner.assertTransferCount(ConvertExcelToCSVProcessor.FAILURE, 0);
MockFlowFile ff = testRunner.getFlowFilesForRelationship(ConvertExcelToCSVProcessor.SUCCESS).get(0);
Long rowsSheet = new Long(ff.getAttribute(ConvertExcelToCSVProcessor.ROW_NUM));
assertTrue(rowsSheet == 9);
ff.assertContentEquals("Numbers,Money\n" +
"1234.456,$ 123.45\n" +
"1234.46,£ 123.45\n" +
"1234.5,¥ 123.45\n" +
"1\\,234.46,$ 1\\,023.45\n" +
"1\\,234.4560,£ 1\\,023.45\n" +
"9.88E+08,¥ 1\\,023.45\n" +
"9.877E+08,\n" +
"9.8765E+08,\n");
}
@Test
public void testCustomDelimiters() throws Exception {
testRunner.enqueue(new File("src/test/resources/dataformatting.xlsx").toPath());