NIFI-3837: - Being more specific regarding the escaping of back references in evaluated expressions.

Signed-off-by: Matt Burgess <mattyb149@apache.org>

fixed typo in comment

This closes #1781
This commit is contained in:
Matt Gilman 2017-05-10 16:15:10 -04:00 committed by Matt Burgess
parent 0b0ac196ea
commit dd0306cce7
2 changed files with 62 additions and 30 deletions

View File

@ -16,25 +16,6 @@
*/
package org.apache.nifi.processors.standard;
import java.io.BufferedWriter;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.OutputStream;
import java.io.OutputStreamWriter;
import java.nio.charset.Charset;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.TimeUnit;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.commons.io.IOUtils;
import org.apache.nifi.annotation.behavior.EventDriven;
import org.apache.nifi.annotation.behavior.InputRequirement;
@ -67,6 +48,25 @@ import org.apache.nifi.processors.standard.util.NLKBufferedReader;
import org.apache.nifi.stream.io.StreamUtils;
import org.apache.nifi.util.StopWatch;
import java.io.BufferedWriter;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.OutputStream;
import java.io.OutputStreamWriter;
import java.nio.charset.Charset;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.TimeUnit;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
@EventDriven
@SideEffectFree
@SupportsBatching
@ -86,7 +86,7 @@ public class ReplaceText extends AbstractProcessor {
public static final String regexReplaceValue = "Regex Replace";
public static final String literalReplaceValue = "Literal Replace";
public static final String alwaysReplace = "Always Replace";
private static final Pattern backReferencePattern = Pattern.compile("\\$(\\d+)");
private static final Pattern unescapedBackReferencePattern = Pattern.compile("[^\\\\]\\$(\\d+)");
private static final String DEFAULT_REGEX = "(?s)(^.*$)";
private static final String DEFAULT_REPLACEMENT_VALUE = "$1";
@ -307,7 +307,7 @@ public class ReplaceText extends AbstractProcessor {
}
String value = unescaped;
final Matcher backRefMatcher = backReferencePattern.matcher(value);
final Matcher backRefMatcher = unescapedBackReferencePattern.matcher(value); // consider unescaped back references
while (backRefMatcher.find()) {
final String backRefNum = backRefMatcher.group(1);
if (backRefNum.startsWith("0")) {
@ -494,10 +494,12 @@ public class ReplaceText extends AbstractProcessor {
private final int numCapturingGroups;
private final Map<String, String> additionalAttrs;
// back references are not supported in the evaluated expression
private static final AttributeValueDecorator escapeBackRefDecorator = new AttributeValueDecorator() {
@Override
public String decorate(final String attributeValue) {
return attributeValue.replace("$", "\\$");
// when we encounter a '$[0-9+]' replace it with '\$[0-9+]'
return attributeValue.replaceAll("(\\$\\d+?)", "\\\\$1");
}
};

View File

@ -16,6 +16,14 @@
*/
package org.apache.nifi.processors.standard;
import org.apache.nifi.util.MockFlowFile;
import org.apache.nifi.util.TestRunner;
import org.apache.nifi.util.TestRunners;
import org.junit.Assert;
import org.junit.Rule;
import org.junit.Test;
import org.junit.rules.ExpectedException;
import java.io.File;
import java.io.IOException;
import java.nio.charset.StandardCharsets;
@ -26,14 +34,6 @@ import java.util.HashMap;
import java.util.Map;
import java.util.regex.Pattern;
import org.apache.nifi.util.MockFlowFile;
import org.apache.nifi.util.TestRunner;
import org.apache.nifi.util.TestRunners;
import org.junit.Assert;
import org.junit.Rule;
import org.junit.Test;
import org.junit.rules.ExpectedException;
public class TestReplaceText {
@Rule
@ -1194,6 +1194,36 @@ public class TestReplaceText {
runner.assertValid();
}
@Test
public void testBackReferenceEscapeWithRegexReplaceUsingEL() throws Exception {
final TestRunner runner = TestRunners.newTestRunner(new ReplaceText());
runner.setValidateExpressionUsage(false);
runner.setProperty(ReplaceText.SEARCH_VALUE, "(?s)(^.*$)");
runner.setProperty(ReplaceText.REPLACEMENT_VALUE, "${'$1':toUpper()}");
runner.setProperty(ReplaceText.REPLACEMENT_STRATEGY, ReplaceText.REGEX_REPLACE);
runner.setProperty(ReplaceText.EVALUATION_MODE, ReplaceText.ENTIRE_TEXT);
runner.assertValid();
runner.enqueue("wo$rd".getBytes());
runner.run();
runner.assertAllFlowFilesTransferred(ReplaceText.REL_SUCCESS, 1);
MockFlowFile out = runner.getFlowFilesForRelationship(ReplaceText.REL_SUCCESS).get(0);
out.assertContentEquals("WO$RD");
runner.enqueue("wo$1rd".getBytes());
runner.run();
runner.assertAllFlowFilesTransferred(ReplaceText.REL_SUCCESS, 2);
out = runner.getFlowFilesForRelationship(ReplaceText.REL_SUCCESS).get(1);
out.assertContentEquals("WO$1RD");
runner.enqueue("wo$1r$2d".getBytes());
runner.run();
runner.assertAllFlowFilesTransferred(ReplaceText.REL_SUCCESS, 3);
out = runner.getFlowFilesForRelationship(ReplaceText.REL_SUCCESS).get(2);
out.assertContentEquals("WO$1R$2D");
}
private String translateNewLines(final File file) throws IOException {
return translateNewLines(file.toPath());
}