diff --git a/nifi-nar-bundles/nifi-standard-services/nifi-record-serialization-services-bundle/nifi-record-serialization-services/pom.xml b/nifi-nar-bundles/nifi-standard-services/nifi-record-serialization-services-bundle/nifi-record-serialization-services/pom.xml
index bd9675356c..bdcffaef90 100644
--- a/nifi-nar-bundles/nifi-standard-services/nifi-record-serialization-services-bundle/nifi-record-serialization-services/pom.xml
+++ b/nifi-nar-bundles/nifi-standard-services/nifi-record-serialization-services-bundle/nifi-record-serialization-services/pom.xml
@@ -99,6 +99,7 @@
src/test/resources/csv/extra-white-space.csv
src/test/resources/csv/multi-bank-account.csv
src/test/resources/csv/single-bank-account.csv
+ src/test/resources/csv/multi-bank-account_escapedchar.csv
src/test/resources/grok/error-with-stack-trace.log
src/test/resources/grok/nifi-log-sample-multiline-with-stacktrace.log
src/test/resources/grok/nifi-log-sample.log
diff --git a/nifi-nar-bundles/nifi-standard-services/nifi-record-serialization-services-bundle/nifi-record-serialization-services/src/main/java/org/apache/nifi/csv/CSVUtils.java b/nifi-nar-bundles/nifi-standard-services/nifi-record-serialization-services-bundle/nifi-record-serialization-services/src/main/java/org/apache/nifi/csv/CSVUtils.java
index 3f5dcf6a5b..17152aa5fc 100644
--- a/nifi-nar-bundles/nifi-standard-services/nifi-record-serialization-services-bundle/nifi-record-serialization-services/src/main/java/org/apache/nifi/csv/CSVUtils.java
+++ b/nifi-nar-bundles/nifi-standard-services/nifi-record-serialization-services-bundle/nifi-record-serialization-services/src/main/java/org/apache/nifi/csv/CSVUtils.java
@@ -19,6 +19,7 @@ package org.apache.nifi.csv;
import org.apache.commons.csv.CSVFormat;
import org.apache.commons.csv.QuoteMode;
+import org.apache.commons.lang3.StringEscapeUtils;
import org.apache.nifi.components.AllowableValue;
import org.apache.nifi.components.PropertyDescriptor;
import org.apache.nifi.components.PropertyValue;
@@ -48,7 +49,7 @@ public class CSVUtils {
static final PropertyDescriptor VALUE_SEPARATOR = new PropertyDescriptor.Builder()
.name("Value Separator")
.description("The character that is used to separate values/fields in a CSV Record")
- .addValidator(new SingleCharacterValidator())
+ .addValidator(CSVValidators.UNESCAPED_SINGLE_CHAR_VALIDATOR)
.expressionLanguageSupported(false)
.defaultValue(",")
.required(true)
@@ -56,7 +57,7 @@ public class CSVUtils {
static final PropertyDescriptor QUOTE_CHAR = new PropertyDescriptor.Builder()
.name("Quote Character")
.description("The character that is used to quote values so that escape characters do not have to be used")
- .addValidator(new SingleCharacterValidator())
+ .addValidator(new CSVValidators.SingleCharacterValidator())
.expressionLanguageSupported(false)
.defaultValue("\"")
.required(true)
@@ -89,14 +90,14 @@ public class CSVUtils {
static final PropertyDescriptor COMMENT_MARKER = new PropertyDescriptor.Builder()
.name("Comment Marker")
.description("The character that is used to denote the start of a comment. Any line that begins with this comment will be ignored.")
- .addValidator(new SingleCharacterValidator())
+ .addValidator(new CSVValidators.SingleCharacterValidator())
.expressionLanguageSupported(false)
.required(false)
.build();
static final PropertyDescriptor ESCAPE_CHAR = new PropertyDescriptor.Builder()
.name("Escape Character")
.description("The character that is used to escape characters that would otherwise have a specific meaning to the CSV Parser.")
- .addValidator(new SingleCharacterValidator())
+ .addValidator(new CSVValidators.SingleCharacterValidator())
.expressionLanguageSupported(false)
.defaultValue("\\")
.required(true)
@@ -179,12 +180,16 @@ public class CSVUtils {
}
}
+ private static char getUnescapedChar(final ConfigurationContext context, final PropertyDescriptor property) {
+ return StringEscapeUtils.unescapeJava(context.getProperty(property).getValue()).charAt(0);
+ }
+
private static char getChar(final ConfigurationContext context, final PropertyDescriptor property) {
return CSVUtils.unescape(context.getProperty(property).getValue()).charAt(0);
}
private static CSVFormat buildCustomFormat(final ConfigurationContext context) {
- final char valueSeparator = getChar(context, VALUE_SEPARATOR);
+ final char valueSeparator = getUnescapedChar(context, VALUE_SEPARATOR);
CSVFormat format = CSVFormat.newFormat(valueSeparator)
.withAllowMissingColumnNames()
.withIgnoreEmptyLines();
diff --git a/nifi-nar-bundles/nifi-standard-services/nifi-record-serialization-services-bundle/nifi-record-serialization-services/src/main/java/org/apache/nifi/csv/CSVValidators.java b/nifi-nar-bundles/nifi-standard-services/nifi-record-serialization-services-bundle/nifi-record-serialization-services/src/main/java/org/apache/nifi/csv/CSVValidators.java
new file mode 100644
index 0000000000..5979407c9f
--- /dev/null
+++ b/nifi-nar-bundles/nifi-standard-services/nifi-record-serialization-services-bundle/nifi-record-serialization-services/src/main/java/org/apache/nifi/csv/CSVValidators.java
@@ -0,0 +1,109 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.nifi.csv;
+
+import org.apache.commons.lang3.StringEscapeUtils;
+import org.apache.nifi.components.ValidationContext;
+import org.apache.nifi.components.ValidationResult;
+import org.apache.nifi.components.Validator;
+
+import java.util.HashSet;
+import java.util.Set;
+
+public class CSVValidators {
+
+ public static class SingleCharacterValidator implements Validator {
+ private static final Set illegalChars = new HashSet<>();
+
+ static {
+ illegalChars.add("\r");
+ illegalChars.add("\n");
+ }
+
+ @Override
+ public ValidationResult validate(final String subject, final String input, final ValidationContext context) {
+
+ if (input == null) {
+ return new ValidationResult.Builder()
+ .input(input)
+ .subject(subject)
+ .valid(false)
+ .explanation("Input is null for this property")
+ .build();
+ }
+
+ final String unescaped = CSVUtils.unescape(input);
+ if (unescaped.length() != 1) {
+ return new ValidationResult.Builder()
+ .input(input)
+ .subject(subject)
+ .valid(false)
+ .explanation("Value must be exactly 1 character but was " + input.length() + " in length")
+ .build();
+ }
+
+ if (illegalChars.contains(unescaped)) {
+ return new ValidationResult.Builder()
+ .input(input)
+ .subject(subject)
+ .valid(false)
+ .explanation(input + " is not a valid character for this property")
+ .build();
+ }
+
+ return new ValidationResult.Builder()
+ .input(input)
+ .subject(subject)
+ .valid(true)
+ .build();
+ }
+
+ }
+
+ public static final Validator UNESCAPED_SINGLE_CHAR_VALIDATOR = new Validator() {
+ @Override
+ public ValidationResult validate(final String subject, final String input, final ValidationContext context) {
+
+ if (input == null) {
+ return new ValidationResult.Builder()
+ .input(input)
+ .subject(subject)
+ .valid(false)
+ .explanation("Input is null for this property")
+ .build();
+ }
+
+ String unescapeString = unescapeString(input);
+
+ return new ValidationResult.Builder()
+ .subject(subject)
+ .input(unescapeString)
+ .explanation("Only non-null single characters are supported")
+ .valid((unescapeString.length() == 1 && unescapeString.charAt(0) != 0) || context.isExpressionLanguagePresent(input))
+ .build();
+ }
+
+ private String unescapeString(String input) {
+ if (input != null && input.length() > 1) {
+ input = StringEscapeUtils.unescapeJava(input);
+ }
+ return input;
+ }
+ };
+
+}
\ No newline at end of file
diff --git a/nifi-nar-bundles/nifi-standard-services/nifi-record-serialization-services-bundle/nifi-record-serialization-services/src/main/java/org/apache/nifi/csv/SingleCharacterValidator.java b/nifi-nar-bundles/nifi-standard-services/nifi-record-serialization-services-bundle/nifi-record-serialization-services/src/main/java/org/apache/nifi/csv/SingleCharacterValidator.java
deleted file mode 100644
index b24dea9dcb..0000000000
--- a/nifi-nar-bundles/nifi-standard-services/nifi-record-serialization-services-bundle/nifi-record-serialization-services/src/main/java/org/apache/nifi/csv/SingleCharacterValidator.java
+++ /dev/null
@@ -1,62 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.nifi.csv;
-
-import java.util.HashSet;
-import java.util.Set;
-
-import org.apache.nifi.components.ValidationContext;
-import org.apache.nifi.components.ValidationResult;
-import org.apache.nifi.components.Validator;
-
-public class SingleCharacterValidator implements Validator {
- private static final Set illegalChars = new HashSet<>();
- static {
- illegalChars.add("\r");
- illegalChars.add("\n");
- }
-
- @Override
- public ValidationResult validate(final String subject, final String input, final ValidationContext context) {
- final String unescaped = CSVUtils.unescape(input);
- if (unescaped.length() != 1) {
- return new ValidationResult.Builder()
- .input(input)
- .subject(subject)
- .valid(false)
- .explanation("Value must be exactly 1 character but was " + input.length() + " in length")
- .build();
- }
-
- if (illegalChars.contains(input)) {
- return new ValidationResult.Builder()
- .input(input)
- .subject(subject)
- .valid(false)
- .explanation(input + " is not a valid character for this property")
- .build();
- }
-
- return new ValidationResult.Builder()
- .input(input)
- .subject(subject)
- .valid(true)
- .build();
- }
-
-}
diff --git a/nifi-nar-bundles/nifi-standard-services/nifi-record-serialization-services-bundle/nifi-record-serialization-services/src/test/java/org/apache/nifi/csv/TestCSVRecordReader.java b/nifi-nar-bundles/nifi-standard-services/nifi-record-serialization-services-bundle/nifi-record-serialization-services/src/test/java/org/apache/nifi/csv/TestCSVRecordReader.java
index 3533a43406..576132fb9f 100644
--- a/nifi-nar-bundles/nifi-standard-services/nifi-record-serialization-services-bundle/nifi-record-serialization-services/src/test/java/org/apache/nifi/csv/TestCSVRecordReader.java
+++ b/nifi-nar-bundles/nifi-standard-services/nifi-record-serialization-services-bundle/nifi-record-serialization-services/src/test/java/org/apache/nifi/csv/TestCSVRecordReader.java
@@ -33,6 +33,7 @@ import java.util.List;
import java.util.TimeZone;
import org.apache.commons.csv.CSVFormat;
+import org.apache.commons.lang3.StringEscapeUtils;
import org.apache.nifi.logging.ComponentLog;
import org.apache.nifi.serialization.MalformedRecordException;
import org.apache.nifi.serialization.SimpleRecordSchema;
@@ -57,7 +58,7 @@ public class TestCSVRecordReader {
return fields;
}
- private CSVRecordReader createReader(final InputStream in, final RecordSchema schema) throws IOException {
+ private CSVRecordReader createReader(final InputStream in, final RecordSchema schema, CSVFormat format) throws IOException {
return new CSVRecordReader(in, Mockito.mock(ComponentLog.class), schema, format, true, false,
RecordFieldType.DATE.getDefaultFormat(), RecordFieldType.TIME.getDefaultFormat(), RecordFieldType.TIMESTAMP.getDefaultFormat());
}
@@ -93,7 +94,7 @@ public class TestCSVRecordReader {
final RecordSchema schema = new SimpleRecordSchema(fields);
try (final InputStream fis = new FileInputStream(new File("src/test/resources/csv/single-bank-account.csv"));
- final CSVRecordReader reader = createReader(fis, schema)) {
+ final CSVRecordReader reader = createReader(fis, schema, format)) {
final Object[] record = reader.nextRecord().getValues();
final Object[] expectedValues = new Object[] {"1", "John Doe", 4750.89D, "123 My Street", "My City", "MS", "11111", "USA"};
@@ -111,7 +112,7 @@ public class TestCSVRecordReader {
final RecordSchema schema = new SimpleRecordSchema(fields);
try (final InputStream fis = new FileInputStream(new File("src/test/resources/csv/multi-bank-account.csv"));
- final CSVRecordReader reader = createReader(fis, schema)) {
+ final CSVRecordReader reader = createReader(fis, schema, format)) {
final Object[] firstRecord = reader.nextRecord().getValues();
final Object[] firstExpectedValues = new Object[] {"1", "John Doe", 4750.89D, "123 My Street", "My City", "MS", "11111", "USA"};
@@ -133,7 +134,7 @@ public class TestCSVRecordReader {
final RecordSchema schema = new SimpleRecordSchema(fields);
try (final InputStream fis = new FileInputStream(new File("src/test/resources/csv/extra-white-space.csv"));
- final CSVRecordReader reader = createReader(fis, schema)) {
+ final CSVRecordReader reader = createReader(fis, schema, format)) {
final Object[] firstRecord = reader.nextRecord().getValues();
final Object[] firstExpectedValues = new Object[] {"1", "John Doe", 4750.89D, "123 My Street", "My City", "MS", "11111", "USA"};
@@ -160,7 +161,7 @@ public class TestCSVRecordReader {
final byte[] inputData = csvData.getBytes();
try (final InputStream bais = new ByteArrayInputStream(inputData);
- final CSVRecordReader reader = createReader(bais, schema)) {
+ final CSVRecordReader reader = createReader(bais, schema, format)) {
final Record record = reader.nextRecord();
assertNotNull(record);
@@ -190,7 +191,7 @@ public class TestCSVRecordReader {
// test nextRecord does not contain a 'continent' field
try (final InputStream bais = new ByteArrayInputStream(inputData);
- final CSVRecordReader reader = createReader(bais, schema)) {
+ final CSVRecordReader reader = createReader(bais, schema, format)) {
final Record record = reader.nextRecord();
assertNotNull(record);
@@ -210,7 +211,7 @@ public class TestCSVRecordReader {
// test nextRawRecord does contain 'continent' field
try (final InputStream bais = new ByteArrayInputStream(inputData);
- final CSVRecordReader reader = createReader(bais, schema)) {
+ final CSVRecordReader reader = createReader(bais, schema, format)) {
final Record record = reader.nextRecord(false, false);
assertNotNull(record);
@@ -241,7 +242,7 @@ public class TestCSVRecordReader {
final byte[] inputData = csvData.getBytes();
try (final InputStream bais = new ByteArrayInputStream(inputData);
- final CSVRecordReader reader = createReader(bais, schema)) {
+ final CSVRecordReader reader = createReader(bais, schema, format)) {
final Record record = reader.nextRecord();
assertNotNull(record);
@@ -304,7 +305,7 @@ public class TestCSVRecordReader {
// test nextRecord does not contain a 'continent' field
try (final InputStream bais = new ByteArrayInputStream(inputData);
- final CSVRecordReader reader = createReader(bais, schema)) {
+ final CSVRecordReader reader = createReader(bais, schema, format)) {
final Record record = reader.nextRecord(false, false);
assertNotNull(record);
@@ -322,4 +323,30 @@ public class TestCSVRecordReader {
assertNull(reader.nextRecord(false, false));
}
}
+
+ @Test
+ public void testMultipleRecordsEscapedWithSpecialChar() throws IOException, MalformedRecordException {
+
+ char delimiter = StringEscapeUtils.unescapeJava("\u0001").charAt(0);
+
+ final CSVFormat format = CSVFormat.DEFAULT.withFirstRecordAsHeader().withTrim().withQuote('"').withDelimiter(delimiter);
+ final List fields = getDefaultFields();
+ fields.replaceAll(f -> f.getFieldName().equals("balance") ? new RecordField("balance", doubleDataType) : f);
+
+ final RecordSchema schema = new SimpleRecordSchema(fields);
+
+ try (final InputStream fis = new FileInputStream(new File("src/test/resources/csv/multi-bank-account_escapedchar.csv"));
+ final CSVRecordReader reader = createReader(fis, schema, format)) {
+
+ final Object[] firstRecord = reader.nextRecord().getValues();
+ final Object[] firstExpectedValues = new Object[] {"1", "John Doe", 4750.89D, "123 My Street", "My City", "MS", "11111", "USA"};
+ Assert.assertArrayEquals(firstExpectedValues, firstRecord);
+
+ final Object[] secondRecord = reader.nextRecord().getValues();
+ final Object[] secondExpectedValues = new Object[] {"2", "Jane Doe", 4820.09D, "321 Your Street", "Your City", "NY", "33333", "USA"};
+ Assert.assertArrayEquals(secondExpectedValues, secondRecord);
+
+ assertNull(reader.nextRecord());
+ }
+ }
}
diff --git a/nifi-nar-bundles/nifi-standard-services/nifi-record-serialization-services-bundle/nifi-record-serialization-services/src/test/java/org/apache/nifi/csv/TestCSVValidators.java b/nifi-nar-bundles/nifi-standard-services/nifi-record-serialization-services-bundle/nifi-record-serialization-services/src/test/java/org/apache/nifi/csv/TestCSVValidators.java
new file mode 100644
index 0000000000..5c7c9e278a
--- /dev/null
+++ b/nifi-nar-bundles/nifi-standard-services/nifi-record-serialization-services-bundle/nifi-record-serialization-services/src/test/java/org/apache/nifi/csv/TestCSVValidators.java
@@ -0,0 +1,98 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.nifi.csv;
+
+import org.apache.nifi.components.ValidationContext;
+import org.apache.nifi.components.ValidationResult;
+import org.apache.nifi.components.Validator;
+import org.junit.Test;
+import org.mockito.Mockito;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertTrue;
+
+
+public class TestCSVValidators {
+
+ /*** SingleCharValidator **/
+ @Test
+ public void testSingleCharNullValue() {
+
+ CSVValidators.SingleCharacterValidator validator = new CSVValidators.SingleCharacterValidator();
+ ValidationContext mockContext = Mockito.mock(ValidationContext.class);
+ ValidationResult result = validator.validate("EscapeChar", null, mockContext);
+ assertEquals("Input is null for this property", result.getExplanation());
+ assertFalse(result.isValid());
+ }
+
+ @Test
+ public void testSingleCharTab() {
+ CSVValidators.SingleCharacterValidator validator = new CSVValidators.SingleCharacterValidator();
+ ValidationContext mockContext = Mockito.mock(ValidationContext.class);
+ ValidationResult result = validator.validate("EscapeChar", "\\t", mockContext);
+ assertTrue(result.isValid());
+ }
+
+ @Test
+ public void testSingleCharIllegalChar() {
+ CSVValidators.SingleCharacterValidator validator = new CSVValidators.SingleCharacterValidator();
+ ValidationContext mockContext = Mockito.mock(ValidationContext.class);
+ ValidationResult result = validator.validate("EscapeChar", "\\r", mockContext);
+ assertEquals("\\r is not a valid character for this property", result.getExplanation());
+ assertFalse(result.isValid());
+ }
+
+ @Test
+ public void testSingleCharGoodChar() {
+ CSVValidators.SingleCharacterValidator validator = new CSVValidators.SingleCharacterValidator();
+ ValidationContext mockContext = Mockito.mock(ValidationContext.class);
+ ValidationResult result = validator.validate("EscapeChar", "'", mockContext);
+ assertTrue(result.isValid());
+ }
+
+
+ /*** Unescaped SingleCharValidator **/
+
+ @Test
+ public void testUnEscapedSingleCharNullValue() {
+ Validator validator = CSVValidators.UNESCAPED_SINGLE_CHAR_VALIDATOR;
+ ValidationContext mockContext = Mockito.mock(ValidationContext.class);
+ ValidationResult result = validator.validate("Delimiter", null, mockContext);
+ assertEquals("Input is null for this property", result.getExplanation());
+ assertFalse(result.isValid());
+
+ }
+
+ @Test
+ public void testUnescapedSingleCharUnicodeChar() {
+ Validator validator = CSVValidators.UNESCAPED_SINGLE_CHAR_VALIDATOR;
+ ValidationContext mockContext = Mockito.mock(ValidationContext.class);
+ ValidationResult result = validator.validate("Delimiter", "\\u0001", mockContext);
+ assertTrue(result.isValid());
+ }
+
+ @Test
+ public void testUnescapedSingleCharGoodChar() {
+ Validator validator = CSVValidators.UNESCAPED_SINGLE_CHAR_VALIDATOR;
+ ValidationContext mockContext = Mockito.mock(ValidationContext.class);
+ ValidationResult result = validator.validate("Delimiter", ",", mockContext);
+ assertTrue(result.isValid());
+ }
+
+}
diff --git a/nifi-nar-bundles/nifi-standard-services/nifi-record-serialization-services-bundle/nifi-record-serialization-services/src/test/resources/csv/multi-bank-account_escapedchar.csv b/nifi-nar-bundles/nifi-standard-services/nifi-record-serialization-services-bundle/nifi-record-serialization-services/src/test/resources/csv/multi-bank-account_escapedchar.csv
new file mode 100644
index 0000000000..bb18bc8eb1
--- /dev/null
+++ b/nifi-nar-bundles/nifi-standard-services/nifi-record-serialization-services-bundle/nifi-record-serialization-services/src/test/resources/csv/multi-bank-account_escapedchar.csv
@@ -0,0 +1,3 @@
+idnamebalanceaddresscitystatezipCodecountry
+1John Doe"4750.89""123 My Street"My CityMS11111USA
+2Jane Doe4820.09321 Your StreetYour CityNY33333USA
\ No newline at end of file