From ae67346648953fe8c3fc218f5f00d25554b9cffd Mon Sep 17 00:00:00 2001 From: Koji Kawamura Date: Wed, 5 Dec 2018 15:03:21 +0900 Subject: [PATCH] NIFI-5826 Fix back-slash escaping at Lexers Adding unit test cases for escaped characters Signed-off-by: Ed This closes #3200 --- .../language/antlr/AttributeExpressionLexer.g | 2 +- .../nifi/hl7/query/antlr/HL7QueryLexer.g | 2 +- .../apache/nifi/record/path/RecordPathLexer.g | 2 +- .../nifi/record/path/TestRecordPath.java | 127 ++++++++++++++++++ 4 files changed, 130 insertions(+), 3 deletions(-) diff --git a/nifi-commons/nifi-expression-language/src/main/antlr3/org/apache/nifi/attribute/expression/language/antlr/AttributeExpressionLexer.g b/nifi-commons/nifi-expression-language/src/main/antlr3/org/apache/nifi/attribute/expression/language/antlr/AttributeExpressionLexer.g index 6c0bcff19a..fb3958c0ad 100644 --- a/nifi-commons/nifi-expression-language/src/main/antlr3/org/apache/nifi/attribute/expression/language/antlr/AttributeExpressionLexer.g +++ b/nifi-commons/nifi-expression-language/src/main/antlr3/org/apache/nifi/attribute/expression/language/antlr/AttributeExpressionLexer.g @@ -232,7 +232,7 @@ ESC | '\\' { setText("\\\\"); } | nextChar = ~('"' | '\'' | 'r' | 'n' | 't' | '\\') { - StringBuilder lBuf = new StringBuilder(); lBuf.append("\\\\").appendCodePoint(nextChar); setText(lBuf.toString()); + StringBuilder lBuf = new StringBuilder(); lBuf.append("\\").appendCodePoint(nextChar); setText(lBuf.toString()); } ) ; diff --git a/nifi-commons/nifi-hl7-query-language/src/main/antlr3/org/apache/nifi/hl7/query/antlr/HL7QueryLexer.g b/nifi-commons/nifi-hl7-query-language/src/main/antlr3/org/apache/nifi/hl7/query/antlr/HL7QueryLexer.g index 478028b9f7..ab9d355107 100644 --- a/nifi-commons/nifi-hl7-query-language/src/main/antlr3/org/apache/nifi/hl7/query/antlr/HL7QueryLexer.g +++ b/nifi-commons/nifi-hl7-query-language/src/main/antlr3/org/apache/nifi/hl7/query/antlr/HL7QueryLexer.g @@ -162,7 +162,7 @@ ESC | '\\' { setText("\\\\"); } | nextChar = ~('"' | '\'' | 'r' | 'n' | 't' | '\\') { - StringBuilder lBuf = new StringBuilder(); lBuf.append("\\\\").appendCodePoint(nextChar); setText(lBuf.toString()); + StringBuilder lBuf = new StringBuilder(); lBuf.append("\\").appendCodePoint(nextChar); setText(lBuf.toString()); } ) ; diff --git a/nifi-commons/nifi-record-path/src/main/antlr3/org/apache/nifi/record/path/RecordPathLexer.g b/nifi-commons/nifi-record-path/src/main/antlr3/org/apache/nifi/record/path/RecordPathLexer.g index cd466f7b6a..b6a838004b 100644 --- a/nifi-commons/nifi-record-path/src/main/antlr3/org/apache/nifi/record/path/RecordPathLexer.g +++ b/nifi-commons/nifi-record-path/src/main/antlr3/org/apache/nifi/record/path/RecordPathLexer.g @@ -152,7 +152,7 @@ ESC | '\\' { setText("\\\\"); } | nextChar = ~('"' | '\'' | 'r' | 'n' | 't' | '\\') { - StringBuilder lBuf = new StringBuilder(); lBuf.append("\\\\").appendCodePoint(nextChar); setText(lBuf.toString()); + StringBuilder lBuf = new StringBuilder(); lBuf.append("\\").appendCodePoint(nextChar); setText(lBuf.toString()); } ) ; diff --git a/nifi-commons/nifi-record-path/src/test/java/org/apache/nifi/record/path/TestRecordPath.java b/nifi-commons/nifi-record-path/src/test/java/org/apache/nifi/record/path/TestRecordPath.java index 67c14e6475..881fb6412c 100644 --- a/nifi-commons/nifi-record-path/src/test/java/org/apache/nifi/record/path/TestRecordPath.java +++ b/nifi-commons/nifi-record-path/src/test/java/org/apache/nifi/record/path/TestRecordPath.java @@ -1026,6 +1026,133 @@ public class TestRecordPath { assertEquals("Jxohn Dxoe", RecordPath.compile("replaceRegex(/name, '(?[JD])', '${hello}x')").evaluate(record).getSelectedFields().findFirst().get().getValue()); assertEquals("48ohn 48oe", RecordPath.compile("replaceRegex(/name, '(?[JD])', /id)").evaluate(record).getSelectedFields().findFirst().get().getValue()); + + } + + @Test + public void testReplaceRegexEscapedCharacters() { + final List fields = new ArrayList<>(); + fields.add(new RecordField("id", RecordFieldType.INT.getDataType())); + fields.add(new RecordField("name", RecordFieldType.STRING.getDataType())); + + final RecordSchema schema = new SimpleRecordSchema(fields); + + final Map values = new HashMap<>(); + values.put("id", 48); + final Record record = new MapRecord(schema, values); + + // Special character cases + values.put("name", "John Doe"); + assertEquals("Replacing whitespace to new line", + "John\nDoe", RecordPath.compile("replaceRegex(/name, '[\\s]', '\\n')") + .evaluate(record).getSelectedFields().findFirst().get().getValue()); + + values.put("name", "John\nDoe"); + assertEquals("Replacing new line to whitespace", + "John Doe", RecordPath.compile("replaceRegex(/name, '\\n', ' ')") + .evaluate(record).getSelectedFields().findFirst().get().getValue()); + + values.put("name", "John Doe"); + assertEquals("Replacing whitespace to tab", + "John\tDoe", RecordPath.compile("replaceRegex(/name, '[\\s]', '\\t')") + .evaluate(record).getSelectedFields().findFirst().get().getValue()); + + values.put("name", "John\tDoe"); + assertEquals("Replacing tab to whitespace", + "John Doe", RecordPath.compile("replaceRegex(/name, '\\t', ' ')") + .evaluate(record).getSelectedFields().findFirst().get().getValue()); + + } + + @Test + public void testReplaceRegexEscapedQuotes() { + + final List fields = new ArrayList<>(); + fields.add(new RecordField("id", RecordFieldType.INT.getDataType())); + fields.add(new RecordField("name", RecordFieldType.STRING.getDataType())); + + final RecordSchema schema = new SimpleRecordSchema(fields); + + final Map values = new HashMap<>(); + values.put("id", 48); + final Record record = new MapRecord(schema, values); + + // Quotes + // NOTE: At Java code, a single back-slash needs to be escaped with another-back slash, but needn't to do so at NiFi UI. + // The test record path is equivalent to replaceRegex(/name, '\'', '"') + values.put("name", "'John' 'Doe'"); + assertEquals("Replacing quote to double-quote", + "\"John\" \"Doe\"", RecordPath.compile("replaceRegex(/name, '\\'', '\"')") + .evaluate(record).getSelectedFields().findFirst().get().getValue()); + + values.put("name", "\"John\" \"Doe\""); + assertEquals("Replacing double-quote to single-quote", + "'John' 'Doe'", RecordPath.compile("replaceRegex(/name, '\"', '\\'')") + .evaluate(record).getSelectedFields().findFirst().get().getValue()); + + values.put("name", "'John' 'Doe'"); + assertEquals("Replacing quote to double-quote, the function arguments are wrapped by double-quote", + "\"John\" \"Doe\"", RecordPath.compile("replaceRegex(/name, \"'\", \"\\\"\")") + .evaluate(record).getSelectedFields().findFirst().get().getValue()); + + values.put("name", "\"John\" \"Doe\""); + assertEquals("Replacing double-quote to single-quote, the function arguments are wrapped by double-quote", + "'John' 'Doe'", RecordPath.compile("replaceRegex(/name, \"\\\"\", \"'\")") + .evaluate(record).getSelectedFields().findFirst().get().getValue()); + + } + + @Test + public void testReplaceRegexEscapedBackSlashes() { + + final List fields = new ArrayList<>(); + fields.add(new RecordField("id", RecordFieldType.INT.getDataType())); + fields.add(new RecordField("name", RecordFieldType.STRING.getDataType())); + + final RecordSchema schema = new SimpleRecordSchema(fields); + + final Map values = new HashMap<>(); + values.put("id", 48); + final Record record = new MapRecord(schema, values); + + // Back-slash + // NOTE: At Java code, a single back-slash needs to be escaped with another-back slash, but needn't to do so at NiFi UI. + // The test record path is equivalent to replaceRegex(/name, '\\', '/') + values.put("name", "John\\Doe"); + assertEquals("Replacing a back-slash to forward-slash", + "John/Doe", RecordPath.compile("replaceRegex(/name, '\\\\', '/')") + .evaluate(record).getSelectedFields().findFirst().get().getValue()); + + values.put("name", "John/Doe"); + assertEquals("Replacing a forward-slash to back-slash", + "John\\Doe", RecordPath.compile("replaceRegex(/name, '/', '\\\\')") + .evaluate(record).getSelectedFields().findFirst().get().getValue()); + + } + + @Test + public void testReplaceRegexEscapedBrackets() { + + final List fields = new ArrayList<>(); + fields.add(new RecordField("id", RecordFieldType.INT.getDataType())); + fields.add(new RecordField("name", RecordFieldType.STRING.getDataType())); + + final RecordSchema schema = new SimpleRecordSchema(fields); + + final Map values = new HashMap<>(); + values.put("id", 48); + final Record record = new MapRecord(schema, values); + + // Brackets + values.put("name", "J[o]hn Do[e]"); + assertEquals("Square brackets can be escaped with back-slash", + "J(o)hn Do(e)", RecordPath.compile("replaceRegex(replaceRegex(/name, '\\[', '('), '\\]', ')')") + .evaluate(record).getSelectedFields().findFirst().get().getValue()); + + values.put("name", "J(o)hn Do(e)"); + assertEquals("Brackets can be escaped with back-slash", + "J[o]hn Do[e]", RecordPath.compile("replaceRegex(replaceRegex(/name, '\\(', '['), '\\)', ']')") + .evaluate(record).getSelectedFields().findFirst().get().getValue()); } @Test