NIFI-5826 Fix back-slash escaping at Lexers

Adding unit test cases for escaped characters

Signed-off-by: Ed <edward.berezitsky@gmail.com>

This closes #3200
This commit is contained in:
Koji Kawamura 2018-12-05 15:03:21 +09:00 committed by Ed
parent 93efc2affc
commit ae67346648
4 changed files with 130 additions and 3 deletions

View File

@ -232,7 +232,7 @@ ESC
| '\\' { setText("\\\\"); }
| nextChar = ~('"' | '\'' | 'r' | 'n' | 't' | '\\')
{
StringBuilder lBuf = new StringBuilder(); lBuf.append("\\\\").appendCodePoint(nextChar); setText(lBuf.toString());
StringBuilder lBuf = new StringBuilder(); lBuf.append("\\").appendCodePoint(nextChar); setText(lBuf.toString());
}
)
;

View File

@ -162,7 +162,7 @@ ESC
| '\\' { setText("\\\\"); }
| nextChar = ~('"' | '\'' | 'r' | 'n' | 't' | '\\')
{
StringBuilder lBuf = new StringBuilder(); lBuf.append("\\\\").appendCodePoint(nextChar); setText(lBuf.toString());
StringBuilder lBuf = new StringBuilder(); lBuf.append("\\").appendCodePoint(nextChar); setText(lBuf.toString());
}
)
;

View File

@ -152,7 +152,7 @@ ESC
| '\\' { setText("\\\\"); }
| nextChar = ~('"' | '\'' | 'r' | 'n' | 't' | '\\')
{
StringBuilder lBuf = new StringBuilder(); lBuf.append("\\\\").appendCodePoint(nextChar); setText(lBuf.toString());
StringBuilder lBuf = new StringBuilder(); lBuf.append("\\").appendCodePoint(nextChar); setText(lBuf.toString());
}
)
;

View File

@ -1026,6 +1026,133 @@ public class TestRecordPath {
assertEquals("Jxohn Dxoe", RecordPath.compile("replaceRegex(/name, '(?<hello>[JD])', '${hello}x')").evaluate(record).getSelectedFields().findFirst().get().getValue());
assertEquals("48ohn 48oe", RecordPath.compile("replaceRegex(/name, '(?<hello>[JD])', /id)").evaluate(record).getSelectedFields().findFirst().get().getValue());
}
@Test
public void testReplaceRegexEscapedCharacters() {
final List<RecordField> fields = new ArrayList<>();
fields.add(new RecordField("id", RecordFieldType.INT.getDataType()));
fields.add(new RecordField("name", RecordFieldType.STRING.getDataType()));
final RecordSchema schema = new SimpleRecordSchema(fields);
final Map<String, Object> values = new HashMap<>();
values.put("id", 48);
final Record record = new MapRecord(schema, values);
// Special character cases
values.put("name", "John Doe");
assertEquals("Replacing whitespace to new line",
"John\nDoe", RecordPath.compile("replaceRegex(/name, '[\\s]', '\\n')")
.evaluate(record).getSelectedFields().findFirst().get().getValue());
values.put("name", "John\nDoe");
assertEquals("Replacing new line to whitespace",
"John Doe", RecordPath.compile("replaceRegex(/name, '\\n', ' ')")
.evaluate(record).getSelectedFields().findFirst().get().getValue());
values.put("name", "John Doe");
assertEquals("Replacing whitespace to tab",
"John\tDoe", RecordPath.compile("replaceRegex(/name, '[\\s]', '\\t')")
.evaluate(record).getSelectedFields().findFirst().get().getValue());
values.put("name", "John\tDoe");
assertEquals("Replacing tab to whitespace",
"John Doe", RecordPath.compile("replaceRegex(/name, '\\t', ' ')")
.evaluate(record).getSelectedFields().findFirst().get().getValue());
}
@Test
public void testReplaceRegexEscapedQuotes() {
final List<RecordField> fields = new ArrayList<>();
fields.add(new RecordField("id", RecordFieldType.INT.getDataType()));
fields.add(new RecordField("name", RecordFieldType.STRING.getDataType()));
final RecordSchema schema = new SimpleRecordSchema(fields);
final Map<String, Object> values = new HashMap<>();
values.put("id", 48);
final Record record = new MapRecord(schema, values);
// Quotes
// NOTE: At Java code, a single back-slash needs to be escaped with another-back slash, but needn't to do so at NiFi UI.
// The test record path is equivalent to replaceRegex(/name, '\'', '"')
values.put("name", "'John' 'Doe'");
assertEquals("Replacing quote to double-quote",
"\"John\" \"Doe\"", RecordPath.compile("replaceRegex(/name, '\\'', '\"')")
.evaluate(record).getSelectedFields().findFirst().get().getValue());
values.put("name", "\"John\" \"Doe\"");
assertEquals("Replacing double-quote to single-quote",
"'John' 'Doe'", RecordPath.compile("replaceRegex(/name, '\"', '\\'')")
.evaluate(record).getSelectedFields().findFirst().get().getValue());
values.put("name", "'John' 'Doe'");
assertEquals("Replacing quote to double-quote, the function arguments are wrapped by double-quote",
"\"John\" \"Doe\"", RecordPath.compile("replaceRegex(/name, \"'\", \"\\\"\")")
.evaluate(record).getSelectedFields().findFirst().get().getValue());
values.put("name", "\"John\" \"Doe\"");
assertEquals("Replacing double-quote to single-quote, the function arguments are wrapped by double-quote",
"'John' 'Doe'", RecordPath.compile("replaceRegex(/name, \"\\\"\", \"'\")")
.evaluate(record).getSelectedFields().findFirst().get().getValue());
}
@Test
public void testReplaceRegexEscapedBackSlashes() {
final List<RecordField> fields = new ArrayList<>();
fields.add(new RecordField("id", RecordFieldType.INT.getDataType()));
fields.add(new RecordField("name", RecordFieldType.STRING.getDataType()));
final RecordSchema schema = new SimpleRecordSchema(fields);
final Map<String, Object> values = new HashMap<>();
values.put("id", 48);
final Record record = new MapRecord(schema, values);
// Back-slash
// NOTE: At Java code, a single back-slash needs to be escaped with another-back slash, but needn't to do so at NiFi UI.
// The test record path is equivalent to replaceRegex(/name, '\\', '/')
values.put("name", "John\\Doe");
assertEquals("Replacing a back-slash to forward-slash",
"John/Doe", RecordPath.compile("replaceRegex(/name, '\\\\', '/')")
.evaluate(record).getSelectedFields().findFirst().get().getValue());
values.put("name", "John/Doe");
assertEquals("Replacing a forward-slash to back-slash",
"John\\Doe", RecordPath.compile("replaceRegex(/name, '/', '\\\\')")
.evaluate(record).getSelectedFields().findFirst().get().getValue());
}
@Test
public void testReplaceRegexEscapedBrackets() {
final List<RecordField> fields = new ArrayList<>();
fields.add(new RecordField("id", RecordFieldType.INT.getDataType()));
fields.add(new RecordField("name", RecordFieldType.STRING.getDataType()));
final RecordSchema schema = new SimpleRecordSchema(fields);
final Map<String, Object> values = new HashMap<>();
values.put("id", 48);
final Record record = new MapRecord(schema, values);
// Brackets
values.put("name", "J[o]hn Do[e]");
assertEquals("Square brackets can be escaped with back-slash",
"J(o)hn Do(e)", RecordPath.compile("replaceRegex(replaceRegex(/name, '\\[', '('), '\\]', ')')")
.evaluate(record).getSelectedFields().findFirst().get().getValue());
values.put("name", "J(o)hn Do(e)");
assertEquals("Brackets can be escaped with back-slash",
"J[o]hn Do[e]", RecordPath.compile("replaceRegex(replaceRegex(/name, '\\(', '['), '\\)', ']')")
.evaluate(record).getSelectedFields().findFirst().get().getValue());
}
@Test