diff --git a/nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/pom.xml b/nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/pom.xml index b0b3afabfc..4ee382fb8c 100644 --- a/nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/pom.xml +++ b/nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/pom.xml @@ -190,15 +190,12 @@ language governing permissions and limitations under the License. --> derby test - com.h2database h2 1.4.187 test - - @@ -251,6 +248,8 @@ language governing permissions and limitations under the License. --> src/test/resources/TestReplaceTextLineByLine/cu_Po.txt src/test/resources/TestReplaceTextLineByLine/food.txt src/test/resources/TestReplaceTextLineByLine/testFile.txt + src/test/resources/TestReplaceTextLineByLine/AppendLineByLineTest.txt + src/test/resources/TestReplaceTextLineByLine/PrependLineByLineTest.txt src/test/resources/TestReplaceTextWithMapping/color-fruit-backreference-mapping.txt src/test/resources/TestReplaceTextWithMapping/color-fruit-blank-mapping.txt src/test/resources/TestReplaceTextWithMapping/color-fruit-escaped-dollar-mapping.txt diff --git a/nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/src/main/java/org/apache/nifi/processors/standard/ReplaceText.java b/nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/src/main/java/org/apache/nifi/processors/standard/ReplaceText.java index 98a56bf100..fb51d45381 100644 --- a/nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/src/main/java/org/apache/nifi/processors/standard/ReplaceText.java +++ b/nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/src/main/java/org/apache/nifi/processors/standard/ReplaceText.java @@ -35,10 +35,12 @@ import java.util.regex.Pattern; import org.apache.nifi.annotation.behavior.EventDriven; import org.apache.nifi.annotation.behavior.InputRequirement; import org.apache.nifi.annotation.behavior.InputRequirement.Requirement; +import org.apache.commons.io.IOUtils; import org.apache.nifi.annotation.behavior.SideEffectFree; import org.apache.nifi.annotation.behavior.SupportsBatching; import org.apache.nifi.annotation.documentation.CapabilityDescription; import org.apache.nifi.annotation.documentation.Tags; +import org.apache.nifi.components.AllowableValue; import org.apache.nifi.components.PropertyDescriptor; import org.apache.nifi.components.Validator; import org.apache.nifi.expression.AttributeValueDecorator; @@ -64,84 +66,111 @@ import org.apache.nifi.util.StopWatch; @SupportsBatching @InputRequirement(Requirement.INPUT_REQUIRED) @Tags({"Text", "Regular Expression", "Update", "Change", "Replace", "Modify", "Regex"}) -@CapabilityDescription("Updates the content of a FlowFile by evaluating a Regular Expression against it and replacing the section of " - + "the content that matches the Regular Expression with some alternate value.") +@CapabilityDescription("Updates the content of a FlowFile by evaluating a Regular Expression (regex) against it and replacing the section of " + + "the content that matches the Regular Expression with some alternate value.") public class ReplaceText extends AbstractProcessor { - //Constants + // Constants public static final String LINE_BY_LINE = "Line-by-Line"; public static final String ENTIRE_TEXT = "Entire text"; + public static final String prependValue = "Prepend"; + public static final String appendValue = "Append"; + public static final String regexReplaceValue = "Regex Replace"; + public static final String literalReplaceValue = "Literal Replace"; private final Pattern backReferencePattern = Pattern.compile("\\$(\\d+)"); - private static final byte[] ZERO_BYTE_BUFFER = new byte[0]; private static final String DEFAULT_REGEX = "(?s:^.*$)"; private static final String DEFAULT_REPLACEMENT_VALUE = "$1"; - // Properties - public static final PropertyDescriptor REGEX = new PropertyDescriptor.Builder() - .name("Regular Expression") - .description("The Regular Expression to search for in the FlowFile content") - .required(true) - .addValidator(StandardValidators.createRegexValidator(0, Integer.MAX_VALUE, true)) - .expressionLanguageSupported(true) - .defaultValue(DEFAULT_REGEX) - .build(); + // Prepend and Append will just insert the replacement value at the beginning or end + // Properties PREPEND, APPEND, REGEX_REPLACE, LITERAL_REPLACE + static final AllowableValue PREPEND = new AllowableValue(prependValue, prependValue, + "Insert the Replacement Value at the beginning of the FlowFile or the beginning of each line (depending on the Evaluation Mode). For \"Line-by-Line\" Evaluation Mode, " + + "the value will be prepended to each line. For \"Entire Text\" evaluation mode, the value will be prepended to the entire text."); + static final AllowableValue APPEND = new AllowableValue(appendValue, appendValue, + "Insert the Replacement Value at the end of the FlowFile or the end of each line (depending on the Evluation Mode). For \"Line-by-Line\" Evaluation Mode, " + + "the value will be appended to each line. For \"Entire Text\" evaluation mode, the value will be appended to the entire text."); + static final AllowableValue LITERAL_REPLACE = new AllowableValue(literalReplaceValue, literalReplaceValue, + "Search for all instances of the Search Value and replace the matches with the Replacement Value."); + static final AllowableValue REGEX_REPLACE = new AllowableValue(regexReplaceValue, regexReplaceValue, + "Interpret the Search Value as a Regular Expression and replace all matches with the Replacement Value. The Replacement Value may reference Capturing Groups used " + + "in the Search Value by using a dollar-sign followed by the Capturing Group number, such as $1 or $2. If the Search Value is set to .* then everything is replaced without " + + "even evaluating the Regular Expression."); + + public static final PropertyDescriptor SEARCH_VALUE = new PropertyDescriptor.Builder() + .name("Regular Expression") + .displayName("Search Value") + .description("The Search Value to search for in the FlowFile content. Only used for 'Literal Replace' and 'Regex Replace' matching strategies") + .required(true) + .addValidator(StandardValidators.createRegexValidator(0, Integer.MAX_VALUE, true)) + .expressionLanguageSupported(true) + .defaultValue(DEFAULT_REGEX) + .build(); public static final PropertyDescriptor REPLACEMENT_VALUE = new PropertyDescriptor.Builder() - .name("Replacement Value") - .description("The value to replace the regular expression with. Back-references to Regular Expression capturing groups are supported, but " - + "back-references that reference capturing groups that do not exist in the regular expression will be treated as literal value.") - .required(true) - .defaultValue(DEFAULT_REPLACEMENT_VALUE) - .addValidator(Validator.VALID) - .expressionLanguageSupported(true) - .build(); + .name("Replacement Value") + .description("The value to insert using the 'Replacement Strategy'. Using \"Regex Replace\" back-references to Regular Expression capturing groups " + + "are supported, but back-references that reference capturing groups that do not exist in the regular expression will be treated as literal value.") + .required(true) + .defaultValue(DEFAULT_REPLACEMENT_VALUE) + .addValidator(Validator.VALID) + .expressionLanguageSupported(true) + .build(); public static final PropertyDescriptor CHARACTER_SET = new PropertyDescriptor.Builder() - .name("Character Set") - .description("The Character Set in which the file is encoded") - .required(true) - .addValidator(StandardValidators.CHARACTER_SET_VALIDATOR) - .defaultValue("UTF-8") - .build(); + .name("Character Set") + .description("The Character Set in which the file is encoded") + .required(true) + .addValidator(StandardValidators.CHARACTER_SET_VALIDATOR) + .defaultValue("UTF-8") + .build(); public static final PropertyDescriptor MAX_BUFFER_SIZE = new PropertyDescriptor.Builder() - .name("Maximum Buffer Size") - .description("Specifies the maximum amount of data to buffer (per file or per line, depending on the Evaluation Mode) in order to " - + "apply the regular expressions. If 'Entire Text' (in Evaluation Mode) is selected and the FlowFile is larger than this value, " - + "the FlowFile will be routed to 'failure'. " - + "In 'Line-by-Line' Mode, if a single line is larger than this value, the FlowFile will be routed to 'failure'. A default value " - + "of 1 MB is provided, primarily for 'Entire Text' mode. In 'Line-by-Line' Mode, a value such as 8 KB or 16 KB is suggested. " - + "This value is ignored and the buffer is not used if 'Regular Expression' is set to '.*'") - .required(true) - .addValidator(StandardValidators.DATA_SIZE_VALIDATOR) - .defaultValue("1 MB") - .build(); + .name("Maximum Buffer Size") + .description("Specifies the maximum amount of data to buffer (per file or per line, depending on the Evaluation Mode) in order to " + + "apply the regular expressions. If 'Entire Text' (in Evaluation Mode) is selected and the FlowFile is larger than this value, " + + "the FlowFile will be routed to 'failure'. " + + "In 'Line-by-Line' Mode, if a single line is larger than this value, the FlowFile will be routed to 'failure'. A default value " + + "of 1 MB is provided, primarily for 'Entire Text' mode. In 'Line-by-Line' Mode, a value such as 8 KB or 16 KB is suggested. " + + "This value is ignored and the buffer is not used if 'Regular Expression' is set to '.*'") + .required(true) + .addValidator(StandardValidators.DATA_SIZE_VALIDATOR) + .defaultValue("1 MB") + .build(); + public static final PropertyDescriptor REPLACEMENT_STRATEGY = new PropertyDescriptor.Builder() + .name("Replacement Strategy") + .description("The strategy for how and what to replace within the FlowFile's text content.") + .allowableValues(PREPEND, APPEND, REGEX_REPLACE, LITERAL_REPLACE) + .defaultValue(REGEX_REPLACE.getValue()) + .required(true) + .build(); public static final PropertyDescriptor EVALUATION_MODE = new PropertyDescriptor.Builder() - .name("Evaluation Mode") - .description("Evaluate the 'Regular Expression' against each line (Line-by-Line) or buffer the entire file into memory (Entire Text) and " - + "then evaluate the 'Regular Expression'.") - .allowableValues(LINE_BY_LINE, ENTIRE_TEXT) - .defaultValue(ENTIRE_TEXT) - .required(true) - .build(); + .name("Evaluation Mode") + .description("Run the 'Replacement Strategy' against each line separately (Line-by-Line) or buffer the entire file into memory (Entire Text) " + + "and run against that.") + .allowableValues(LINE_BY_LINE, ENTIRE_TEXT) + .defaultValue(ENTIRE_TEXT) + .required(true) + .build(); + // Relationships public static final Relationship REL_SUCCESS = new Relationship.Builder() - .name("success") - .description("FlowFiles that have been successfully updated are routed to this relationship, as well as FlowFiles whose content does not " - + "match the given Regular Expression") - .build(); + .name("success") + .description("FlowFiles that have been successfully processed are routed to this relationship. This includes both FlowFiles that had text" + + " replaced and those that did not.") + .build(); public static final Relationship REL_FAILURE = new Relationship.Builder() - .name("failure") - .description("FlowFiles that could not be updated are routed to this relationship") - .build(); - // + .name("failure") + .description("FlowFiles that could not be updated are routed to this relationship") + .build(); + private List properties; private Set relationships; @Override protected void init(final ProcessorInitializationContext context) { final List properties = new ArrayList<>(); - properties.add(REGEX); + properties.add(SEARCH_VALUE); properties.add(REPLACEMENT_VALUE); properties.add(CHARACTER_SET); properties.add(MAX_BUFFER_SIZE); + properties.add(REPLACEMENT_STRATEGY); properties.add(EVALUATION_MODE); this.properties = Collections.unmodifiableList(properties); @@ -169,22 +198,18 @@ public class ReplaceText extends AbstractProcessor { } final ProcessorLog logger = getLogger(); - final String unsubstitutedRegex = context.getProperty(REGEX).getValue(); + + final String unsubstitutedRegex = context.getProperty(SEARCH_VALUE).getValue(); String unsubstitutedReplacement = context.getProperty(REPLACEMENT_VALUE).getValue(); - if (unsubstitutedRegex.equals(DEFAULT_REGEX) && unsubstitutedReplacement.equals(DEFAULT_REPLACEMENT_VALUE)) { + final String replacementStrategy = context.getProperty(REPLACEMENT_STRATEGY).getValue(); + + if (replacementStrategy.equalsIgnoreCase(regexReplaceValue) && unsubstitutedRegex.equals(DEFAULT_REGEX) && unsubstitutedReplacement.equals(DEFAULT_REPLACEMENT_VALUE)) { // This pattern says replace content with itself. We can highly optimize this process by simply transferring // all FlowFiles to the 'success' relationship session.transfer(flowFiles, REL_SUCCESS); return; } - final AttributeValueDecorator quotedAttributeDecorator = new AttributeValueDecorator() { - @Override - public String decorate(final String attributeValue) { - return Pattern.quote(attributeValue); - } - }; - final AttributeValueDecorator escapeBackRefDecorator = new AttributeValueDecorator() { @Override public String decorate(final String attributeValue) { @@ -192,7 +217,7 @@ public class ReplaceText extends AbstractProcessor { } }; - final String regexValue = context.getProperty(REGEX).evaluateAttributeExpressions().getValue(); + final String regexValue = context.getProperty(SEARCH_VALUE).evaluateAttributeExpressions().getValue(); final int numCapturingGroups = Pattern.compile(regexValue).matcher("").groupCount(); final boolean skipBuffer = ".*".equals(unsubstitutedRegex); @@ -200,9 +225,13 @@ public class ReplaceText extends AbstractProcessor { final Charset charset = Charset.forName(context.getProperty(CHARACTER_SET).getValue()); final int maxBufferSize = context.getProperty(MAX_BUFFER_SIZE).asDataSize(DataUnit.B).intValue(); - final byte[] buffer = skipBuffer ? ZERO_BYTE_BUFFER : new byte[maxBufferSize]; - final String evaluateMode = context.getProperty(EVALUATION_MODE).getValue(); + final byte[] buffer; + if (replacementStrategy.equalsIgnoreCase(regexReplaceValue) || replacementStrategy.equalsIgnoreCase(literalReplaceValue)) { + buffer = new byte[maxBufferSize]; + } else { + buffer = null; + } for (FlowFile flowFile : flowFiles) { if (evaluateMode.equalsIgnoreCase(ENTIRE_TEXT)) { @@ -212,47 +241,175 @@ public class ReplaceText extends AbstractProcessor { } } - String replacement = context.getProperty(REPLACEMENT_VALUE).evaluateAttributeExpressions(flowFile, escapeBackRefDecorator).getValue(); - final Matcher backRefMatcher = backReferencePattern.matcher(replacement); - while (backRefMatcher.find()) { - final String backRefNum = backRefMatcher.group(1); - if (backRefNum.startsWith("0")) { - continue; - } - final int originalBackRefIndex = Integer.parseInt(backRefNum); - int backRefIndex = originalBackRefIndex; + String replacement; + if (!replacementStrategy.equals(regexReplaceValue)) { + replacement = context.getProperty(REPLACEMENT_VALUE).evaluateAttributeExpressions(flowFile).getValue(); + } else { + replacement = context.getProperty(REPLACEMENT_VALUE).evaluateAttributeExpressions(flowFile, escapeBackRefDecorator).getValue(); + final Matcher backRefMatcher = backReferencePattern.matcher(replacement); + while (backRefMatcher.find()) { + final String backRefNum = backRefMatcher.group(1); + if (backRefNum.startsWith("0")) { + continue; + } + final int originalBackRefIndex = Integer.parseInt(backRefNum); + int backRefIndex = originalBackRefIndex; - // if we have a replacement value like $123, and we have less than 123 capturing groups, then - // we want to truncate the 3 and use capturing group 12; if we have less than 12 capturing groups, - // then we want to truncate the 2 and use capturing group 1; if we don't have a capturing group then - // we want to truncate the 1 and get 0. - while (backRefIndex > numCapturingGroups && backRefIndex >= 10) { - backRefIndex /= 10; - } + // if we have a replacement value like $123, and we have less than 123 capturing groups, then + // we want to truncate the 3 and use capturing group 12; if we have less than 12 capturing groups, + // then we want to truncate the 2 and use capturing group 1; if we don't have a capturing group then + // we want to truncate the 1 and get 0. + while (backRefIndex > numCapturingGroups && backRefIndex >= 10) { + backRefIndex /= 10; + } - if (backRefIndex > numCapturingGroups) { - final StringBuilder sb = new StringBuilder(replacement.length() + 1); - final int groupStart = backRefMatcher.start(1); + if (backRefIndex > numCapturingGroups) { + final StringBuilder sb = new StringBuilder(replacement.length() + 1); + final int groupStart = backRefMatcher.start(1); - sb.append(replacement.substring(0, groupStart - 1)); - sb.append("\\"); - sb.append(replacement.substring(groupStart - 1)); - replacement = sb.toString(); + sb.append(replacement.substring(0, groupStart - 1)); + sb.append("\\"); + sb.append(replacement.substring(groupStart - 1)); + replacement = sb.toString(); + } } } - replacement = replacement.replaceAll("(\\$\\D)", "\\\\$1"); + ReplacementStrategyExecutor replacementStrategyExecutor; + switch (replacementStrategy) { + case prependValue: + replacementStrategyExecutor = new PrependReplace(); + break; + case appendValue: + replacementStrategyExecutor = new AppendReplace(); + break; + case regexReplaceValue: + replacementStrategyExecutor = new RegexReplace(buffer); + break; + case literalReplaceValue: + replacementStrategyExecutor = new LiteralReplace(buffer); + break; + default: + throw new AssertionError(); + } + + final StopWatch stopWatch = new StopWatch(true); + + flowFile = replacementStrategyExecutor.replace(flowFile, session, context, replacement, evaluateMode, + charset, maxBufferSize, skipBuffer); + + logger.info("Transferred {} to 'success'", new Object[] {flowFile}); + session.getProvenanceReporter().modifyContent(flowFile, stopWatch.getElapsed(TimeUnit.MILLISECONDS)); + session.transfer(flowFile, REL_SUCCESS); + } + } + + private static class PrependReplace implements ReplacementStrategyExecutor { + + @Override + public FlowFile replace(FlowFile flowFile, final ProcessSession session, final ProcessContext context, final String replacementValue, final String evaluateMode, + final Charset charset, final int maxBufferSize, final boolean skipBuffer) { + if (evaluateMode.equalsIgnoreCase(ENTIRE_TEXT)) { + flowFile = session.write(flowFile, new StreamCallback() { + @Override + public void process(final InputStream in, final OutputStream out) throws IOException { + out.write(replacementValue.getBytes(charset)); + IOUtils.copy(in, out); + } + }); + } else { + flowFile = session.write(flowFile, new StreamCallback() { + @Override + public void process(final InputStream in, final OutputStream out) throws IOException { + try (NLKBufferedReader br = new NLKBufferedReader(new InputStreamReader(in, charset), maxBufferSize); + BufferedWriter bw = new BufferedWriter(new OutputStreamWriter(out, charset));) { + String oneLine; + while (null != (oneLine = br.readLine())) { + final String updatedValue = replacementValue.concat(oneLine); + bw.write(updatedValue); + } + } + } + }); + } + return flowFile; + } + } + + private static class AppendReplace implements ReplacementStrategyExecutor { + + @Override + public FlowFile replace(FlowFile flowFile, final ProcessSession session, final ProcessContext context, final String replacementValue, final String evaluateMode, + final Charset charset, final int maxBufferSize, final boolean skipBuffer) { + if (evaluateMode.equalsIgnoreCase(ENTIRE_TEXT)) { + flowFile = session.write(flowFile, new StreamCallback() { + @Override + public void process(final InputStream in, final OutputStream out) throws IOException { + IOUtils.copy(in, out); + out.write(replacementValue.getBytes(charset)); + } + }); + } else { + flowFile = session.write(flowFile, new StreamCallback() { + @Override + public void process(final InputStream in, final OutputStream out) throws IOException { + try (NLKBufferedReader br = new NLKBufferedReader(new InputStreamReader(in, charset), maxBufferSize); + BufferedWriter bw = new BufferedWriter(new OutputStreamWriter(out, charset));) { + String oneLine; + while (null != (oneLine = br.readLine())) { + // we need to find the first carriage return or new-line so that we can append the new value + // before the line separate. However, we don't want to do this using a regular expression due + // to performance concerns. So we will find the first occurrence of either \r or \n and use + // that to insert the replacement value. + boolean foundNewLine = false; + for (int i = 0; i < oneLine.length(); i++) { + final char c = oneLine.charAt(i); + if (foundNewLine) { + bw.write(c); + continue; + } + + if (c == '\r' || c == '\n') { + bw.write(replacementValue); + foundNewLine = true; + } + + bw.write(c); + } + + if (!foundNewLine) { + bw.write(replacementValue); + } + } + } + } + }); + } + return flowFile; + } + } + + + private static class RegexReplace implements ReplacementStrategyExecutor { + private final byte[] buffer; + + public RegexReplace(final byte[] buffer) { + this.buffer = buffer; + } + + @Override + public FlowFile replace(FlowFile flowFile, final ProcessSession session, final ProcessContext context, final String replacementValue, final String evaluateMode, + final Charset charset, final int maxBufferSize, final boolean skipBuffer) { + final String replacementFinal = replacementValue.replaceAll("(\\$\\D)", "\\\\$1"); // always match; just overwrite value with the replacement value; this optimization prevents us // from reading the file at all. - final String replacementValue = replacement; if (skipBuffer) { - final StopWatch stopWatch = new StopWatch(true); if (evaluateMode.equalsIgnoreCase(ENTIRE_TEXT)) { flowFile = session.write(flowFile, new OutputStreamCallback() { @Override public void process(final OutputStream out) throws IOException { - out.write(replacementValue.getBytes(charset)); + out.write(replacementFinal.getBytes(charset)); } }); } else { @@ -260,31 +417,83 @@ public class ReplaceText extends AbstractProcessor { @Override public void process(final InputStream in, final OutputStream out) throws IOException { try (NLKBufferedReader br = new NLKBufferedReader(new InputStreamReader(in, charset), maxBufferSize); - BufferedWriter bw = new BufferedWriter(new OutputStreamWriter(out, charset));) { + BufferedWriter bw = new BufferedWriter(new OutputStreamWriter(out, charset));) { while (null != br.readLine()) { - bw.write(replacementValue); + bw.write(replacementFinal); } } } }); } - session.getProvenanceReporter().modifyContent(flowFile, stopWatch.getElapsed(TimeUnit.MILLISECONDS)); - session.transfer(flowFile, REL_SUCCESS); - logger.info("Transferred {} to 'success'", new Object[]{flowFile}); - continue; - } + } else { + final AttributeValueDecorator quotedAttributeDecorator = new AttributeValueDecorator() { + @Override + public String decorate(final String attributeValue) { + return Pattern.quote(attributeValue); + } + }; + final String searchRegex = context.getProperty(SEARCH_VALUE).evaluateAttributeExpressions(flowFile, quotedAttributeDecorator).getValue(); - final StopWatch stopWatch = new StopWatch(true); - final String regex = context.getProperty(REGEX).evaluateAttributeExpressions(flowFile, quotedAttributeDecorator).getValue(); - - if (evaluateMode.equalsIgnoreCase(ENTIRE_TEXT)) { final int flowFileSize = (int) flowFile.getSize(); + if (evaluateMode.equalsIgnoreCase(ENTIRE_TEXT)) { + flowFile = session.write(flowFile, new StreamCallback() { + @Override + public void process(final InputStream in, final OutputStream out) throws IOException { + StreamUtils.fillBuffer(in, buffer, false); + final String contentString = new String(buffer, 0, flowFileSize, charset); + final String updatedValue = contentString.replaceAll(searchRegex, replacementFinal); + out.write(updatedValue.getBytes(charset)); + } + }); + } else { + flowFile = session.write(flowFile, new StreamCallback() { + @Override + public void process(final InputStream in, final OutputStream out) throws IOException { + try (NLKBufferedReader br = new NLKBufferedReader(new InputStreamReader(in, charset), maxBufferSize); + BufferedWriter bw = new BufferedWriter(new OutputStreamWriter(out, charset))) { + String oneLine; + while (null != (oneLine = br.readLine())) { + final String updatedValue = oneLine.replaceAll(searchRegex, replacementFinal); + bw.write(updatedValue); + } + } + } + }); + } + } + return flowFile; + } + } + + private static class LiteralReplace implements ReplacementStrategyExecutor { + private final byte[] buffer; + + public LiteralReplace(final byte[] buffer) { + this.buffer = buffer; + } + + @Override + public FlowFile replace(FlowFile flowFile, final ProcessSession session, final ProcessContext context, final String replacementValue, final String evaluateMode, + final Charset charset, final int maxBufferSize, final boolean skipBuffer) { + final AttributeValueDecorator quotedAttributeDecorator = new AttributeValueDecorator() { + @Override + public String decorate(final String attributeValue) { + return Pattern.quote(attributeValue); + } + }; + + final String searchValue = context.getProperty(SEARCH_VALUE).evaluateAttributeExpressions(flowFile, quotedAttributeDecorator).getValue(); + + + final int flowFileSize = (int) flowFile.getSize(); + if (evaluateMode.equalsIgnoreCase(ENTIRE_TEXT)) { flowFile = session.write(flowFile, new StreamCallback() { @Override public void process(final InputStream in, final OutputStream out) throws IOException { StreamUtils.fillBuffer(in, buffer, false); final String contentString = new String(buffer, 0, flowFileSize, charset); - final String updatedValue = contentString.replaceAll(regex, replacementValue); + // Interpreting the search and replacement values as char sequences + final String updatedValue = contentString.replace(searchValue, replacementValue); out.write(updatedValue.getBytes(charset)); } }); @@ -293,20 +502,22 @@ public class ReplaceText extends AbstractProcessor { @Override public void process(final InputStream in, final OutputStream out) throws IOException { try (NLKBufferedReader br = new NLKBufferedReader(new InputStreamReader(in, charset), maxBufferSize); - BufferedWriter bw = new BufferedWriter(new OutputStreamWriter(out, charset));) { + BufferedWriter bw = new BufferedWriter(new OutputStreamWriter(out, charset))) { String oneLine; while (null != (oneLine = br.readLine())) { - final String updatedValue = oneLine.replaceAll(regex, replacementValue); + // Interpreting the search and replacement values as char sequences + final String updatedValue = oneLine.replace(searchValue, replacementValue); bw.write(updatedValue); } } } }); } - - logger.info("Transferred {} to 'success'", new Object[]{flowFile}); - session.getProvenanceReporter().modifyContent(flowFile, stopWatch.getElapsed(TimeUnit.MILLISECONDS)); - session.transfer(flowFile, REL_SUCCESS); + return flowFile; } } + + private interface ReplacementStrategyExecutor { + FlowFile replace(FlowFile flowFile, ProcessSession session, ProcessContext context, String replacement, String evaluateMode, Charset charset, int maxBufferSize, boolean skipBuffer); + } } diff --git a/nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/src/test/java/org/apache/nifi/processors/standard/TestReplaceText.java b/nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/src/test/java/org/apache/nifi/processors/standard/TestReplaceText.java index ab5f6bec15..3a311a339e 100644 --- a/nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/src/test/java/org/apache/nifi/processors/standard/TestReplaceText.java +++ b/nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/src/test/java/org/apache/nifi/processors/standard/TestReplaceText.java @@ -16,25 +16,43 @@ */ package org.apache.nifi.processors.standard; +import java.io.File; +import java.io.IOException; +import java.nio.charset.StandardCharsets; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.util.HashMap; +import java.util.Map; +import java.util.regex.Pattern; + import org.apache.nifi.util.MockFlowFile; import org.apache.nifi.util.TestRunner; import org.apache.nifi.util.TestRunners; import org.junit.Assert; import org.junit.Test; -import java.io.IOException; -import java.nio.charset.StandardCharsets; -import java.nio.file.Paths; -import java.util.HashMap; -import java.util.Map; - public class TestReplaceText { + @Test + public void testConfigurationCornerCase() throws IOException { + final TestRunner runner = TestRunners.newTestRunner(new ReplaceText()); + runner.setValidateExpressionUsage(false); + + runner.run(); + runner.enqueue(Paths.get("src/test/resources/hello.txt")); + runner.run(); + + runner.assertAllFlowFilesTransferred(ReplaceText.REL_SUCCESS, 1); + final MockFlowFile out = runner.getFlowFilesForRelationship(ReplaceText.REL_SUCCESS).get(0); + out.assertContentEquals(Paths.get("src/test/resources/hello.txt")); + } + @Test public void testSimple() throws IOException { final TestRunner runner = TestRunners.newTestRunner(new ReplaceText()); runner.setValidateExpressionUsage(false); - runner.setProperty(ReplaceText.REGEX, "ell"); + runner.setProperty(ReplaceText.SEARCH_VALUE, "ell"); runner.setProperty(ReplaceText.REPLACEMENT_VALUE, "lle"); runner.enqueue(Paths.get("src/test/resources/hello.txt")); @@ -45,11 +63,157 @@ public class TestReplaceText { out.assertContentEquals("Hlleo, World!".getBytes("UTF-8")); } + @Test + public void testPrependSimple() throws IOException { + final TestRunner runner = TestRunners.newTestRunner(new ReplaceText()); + runner.setValidateExpressionUsage(false); + runner.setProperty(ReplaceText.REPLACEMENT_VALUE, "TEST"); + runner.setProperty(ReplaceText.REPLACEMENT_STRATEGY, ReplaceText.PREPEND); + + runner.enqueue(Paths.get("src/test/resources/hello.txt")); + runner.run(); + + runner.assertAllFlowFilesTransferred(ReplaceText.REL_SUCCESS, 1); + final MockFlowFile out = runner.getFlowFilesForRelationship(ReplaceText.REL_SUCCESS).get(0); + out.assertContentEquals("TESTHello, World!".getBytes("UTF-8")); + } + + @Test + public void testPrependLineByLine() throws IOException { + final TestRunner runner = TestRunners.newTestRunner(new ReplaceText()); + runner.setValidateExpressionUsage(false); + runner.setProperty(ReplaceText.REPLACEMENT_VALUE, "_"); + runner.setProperty(ReplaceText.REPLACEMENT_STRATEGY, ReplaceText.PREPEND); + runner.setProperty(ReplaceText.EVALUATION_MODE, ReplaceText.LINE_BY_LINE); + + runner.enqueue("hello\nthere\nmadam".getBytes()); + runner.run(); + + runner.assertAllFlowFilesTransferred(ReplaceText.REL_SUCCESS, 1); + final MockFlowFile out = runner.getFlowFilesForRelationship(ReplaceText.REL_SUCCESS).get(0); + out.assertContentEquals("_hello\n_there\n_madam".getBytes("UTF-8")); + } + + @Test + public void testAppendSimple() throws IOException { + final TestRunner runner = TestRunners.newTestRunner(new ReplaceText()); + runner.setValidateExpressionUsage(false); + runner.setProperty(ReplaceText.REPLACEMENT_VALUE, "TEST"); + runner.setProperty(ReplaceText.REPLACEMENT_STRATEGY, ReplaceText.APPEND); + + runner.enqueue(Paths.get("src/test/resources/hello.txt")); + runner.run(); + + runner.assertAllFlowFilesTransferred(ReplaceText.REL_SUCCESS, 1); + final MockFlowFile out = runner.getFlowFilesForRelationship(ReplaceText.REL_SUCCESS).get(0); + out.assertContentEquals("Hello, World!TEST".getBytes("UTF-8")); + } + + @Test + public void testAppendWithCarriageReturn() throws IOException { + final TestRunner runner = TestRunners.newTestRunner(new ReplaceText()); + runner.setValidateExpressionUsage(false); + runner.setProperty(ReplaceText.REPLACEMENT_VALUE, "!"); + runner.setProperty(ReplaceText.REPLACEMENT_STRATEGY, ReplaceText.APPEND); + runner.setProperty(ReplaceText.EVALUATION_MODE, ReplaceText.LINE_BY_LINE); + + runner.enqueue("hello\rthere\rsir".getBytes()); + runner.run(); + + runner.assertAllFlowFilesTransferred(ReplaceText.REL_SUCCESS, 1); + final MockFlowFile out = runner.getFlowFilesForRelationship(ReplaceText.REL_SUCCESS).get(0); + out.assertContentEquals("hello!\rthere!\rsir!"); + } + + @Test + public void testAppendWithNewLine() throws IOException { + final TestRunner runner = TestRunners.newTestRunner(new ReplaceText()); + runner.setValidateExpressionUsage(false); + runner.setProperty(ReplaceText.REPLACEMENT_VALUE, "!"); + runner.setProperty(ReplaceText.REPLACEMENT_STRATEGY, ReplaceText.APPEND); + runner.setProperty(ReplaceText.EVALUATION_MODE, ReplaceText.LINE_BY_LINE); + + runner.enqueue("hello\nthere\nsir".getBytes()); + runner.run(); + + runner.assertAllFlowFilesTransferred(ReplaceText.REL_SUCCESS, 1); + final MockFlowFile out = runner.getFlowFilesForRelationship(ReplaceText.REL_SUCCESS).get(0); + out.assertContentEquals("hello!\nthere!\nsir!"); + } + + @Test + public void testAppendWithCarriageReturnNewLine() throws IOException { + final TestRunner runner = TestRunners.newTestRunner(new ReplaceText()); + runner.setValidateExpressionUsage(false); + runner.setProperty(ReplaceText.REPLACEMENT_VALUE, "!"); + runner.setProperty(ReplaceText.REPLACEMENT_STRATEGY, ReplaceText.APPEND); + runner.setProperty(ReplaceText.EVALUATION_MODE, ReplaceText.LINE_BY_LINE); + + runner.enqueue("hello\r\nthere\r\nsir".getBytes()); + runner.run(); + + runner.assertAllFlowFilesTransferred(ReplaceText.REL_SUCCESS, 1); + final MockFlowFile out = runner.getFlowFilesForRelationship(ReplaceText.REL_SUCCESS).get(0); + out.assertContentEquals("hello!\r\nthere!\r\nsir!"); + } + + @Test + public void testLiteralSimple() throws IOException { + final TestRunner runner = TestRunners.newTestRunner(new ReplaceText()); + runner.setValidateExpressionUsage(false); + runner.setProperty(ReplaceText.SEARCH_VALUE, "ell"); + runner.setProperty(ReplaceText.REPLACEMENT_VALUE, "lle"); + runner.setProperty(ReplaceText.REPLACEMENT_STRATEGY, ReplaceText.LITERAL_REPLACE); + + runner.enqueue(Paths.get("src/test/resources/hello.txt")); + runner.run(); + + runner.assertAllFlowFilesTransferred(ReplaceText.REL_SUCCESS, 1); + final MockFlowFile out = runner.getFlowFilesForRelationship(ReplaceText.REL_SUCCESS).get(0); + out.assertContentEquals("Hlleo, World!".getBytes("UTF-8")); + } + + @Test + public void testLiteralBackReference() throws IOException { + final TestRunner runner = TestRunners.newTestRunner(new ReplaceText()); + runner.setValidateExpressionUsage(false); + runner.setProperty(ReplaceText.SEARCH_VALUE, "ell"); + runner.setProperty(ReplaceText.REPLACEMENT_VALUE, "[$1]"); + runner.setProperty(ReplaceText.REPLACEMENT_STRATEGY, ReplaceText.LITERAL_REPLACE); + + runner.enqueue(Paths.get("src/test/resources/hello.txt")); + runner.run(); + + runner.assertAllFlowFilesTransferred(ReplaceText.REL_SUCCESS, 1); + final MockFlowFile out = runner.getFlowFilesForRelationship(ReplaceText.REL_SUCCESS).get(0); + out.assertContentEquals("H[$1]o, World!"); + } + + @Test + public void testLiteral() throws IOException { + final TestRunner runner = TestRunners.newTestRunner(new ReplaceText()); + runner.setValidateExpressionUsage(false); + runner.setProperty(ReplaceText.SEARCH_VALUE, ".ell."); + runner.setProperty(ReplaceText.REPLACEMENT_VALUE, "test"); + runner.setProperty(ReplaceText.REPLACEMENT_STRATEGY, ReplaceText.LITERAL_REPLACE); + + runner.enqueue(Paths.get("src/test/resources/hello.txt")); + runner.run(); + runner.enqueue("H.ell.o, World! .ell.".getBytes()); + runner.run(); + + runner.assertAllFlowFilesTransferred(ReplaceText.REL_SUCCESS, 2); + final MockFlowFile out = runner.getFlowFilesForRelationship(ReplaceText.REL_SUCCESS).get(0); + out.assertContentEquals("Hello, World!"); + final MockFlowFile out2 = runner.getFlowFilesForRelationship(ReplaceText.REL_SUCCESS).get(1); + out2.assertContentEquals("Htesto, World! test"); + } + @Test public void testBackReference() throws IOException { final TestRunner runner = TestRunners.newTestRunner(new ReplaceText()); runner.setValidateExpressionUsage(false); - runner.setProperty(ReplaceText.REGEX, "(ell)"); + runner.setProperty(ReplaceText.SEARCH_VALUE, "(ell)"); runner.setProperty(ReplaceText.REPLACEMENT_VALUE, "[$1]"); runner.enqueue(Paths.get("src/test/resources/hello.txt")); @@ -65,7 +229,7 @@ public class TestReplaceText { final TestRunner runner = TestRunners.newTestRunner(new ReplaceText()); runner.setValidateExpressionUsage(false); String expected = "Hell23o, World!"; - runner.setProperty(ReplaceText.REGEX, "(ell)"); + runner.setProperty(ReplaceText.SEARCH_VALUE, "(ell)"); runner.setProperty(ReplaceText.REPLACEMENT_VALUE, "$123"); final Map attributes = new HashMap<>(); @@ -83,7 +247,7 @@ public class TestReplaceText { public void testBackRefWithNoCapturingGroup() throws IOException { final TestRunner runner = TestRunners.newTestRunner(new ReplaceText()); runner.setValidateExpressionUsage(false); - runner.setProperty(ReplaceText.REGEX, "ell"); + runner.setProperty(ReplaceText.SEARCH_VALUE, "ell"); runner.setProperty(ReplaceText.REPLACEMENT_VALUE, "$0123"); final Map attributes = new HashMap<>(); @@ -97,10 +261,10 @@ public class TestReplaceText { } @Test - public void testAmy3() throws IOException { + public void testReplacementWithExpressionLanguage() throws IOException { final TestRunner runner = TestRunners.newTestRunner(new ReplaceText()); runner.setValidateExpressionUsage(false); - runner.setProperty(ReplaceText.REGEX, "${replaceKey}"); + runner.setProperty(ReplaceText.SEARCH_VALUE, "${replaceKey}"); runner.setProperty(ReplaceText.REPLACEMENT_VALUE, "GoodBye"); final Map attributes = new HashMap<>(); @@ -118,7 +282,7 @@ public class TestReplaceText { public void testReplacementWithExpressionLanguageIsEscaped() throws IOException { final TestRunner runner = TestRunners.newTestRunner(new ReplaceText()); runner.setValidateExpressionUsage(false); - runner.setProperty(ReplaceText.REGEX, "(ell)"); + runner.setProperty(ReplaceText.SEARCH_VALUE, "(ell)"); runner.setProperty(ReplaceText.REPLACEMENT_VALUE, "[${abc}]"); final Map attributes = new HashMap<>(); @@ -136,7 +300,7 @@ public class TestReplaceText { public void testRegexWithExpressionLanguage() throws IOException { final TestRunner runner = TestRunners.newTestRunner(new ReplaceText()); runner.setValidateExpressionUsage(false); - runner.setProperty(ReplaceText.REGEX, "${replaceKey}"); + runner.setProperty(ReplaceText.SEARCH_VALUE, "${replaceKey}"); runner.setProperty(ReplaceText.REPLACEMENT_VALUE, "${replaceValue}"); final Map attributes = new HashMap<>(); @@ -155,7 +319,7 @@ public class TestReplaceText { public void testRegexWithExpressionLanguageIsEscaped() throws IOException { final TestRunner runner = TestRunners.newTestRunner(new ReplaceText()); runner.setValidateExpressionUsage(false); - runner.setProperty(ReplaceText.REGEX, "${replaceKey}"); + runner.setProperty(ReplaceText.SEARCH_VALUE, "${replaceKey}"); runner.setProperty(ReplaceText.REPLACEMENT_VALUE, "${replaceValue}"); final Map attributes = new HashMap<>(); @@ -174,7 +338,7 @@ public class TestReplaceText { public void testBackReferenceWithTooLargeOfIndexIsEscaped() throws IOException { final TestRunner runner = TestRunners.newTestRunner(new ReplaceText()); runner.setValidateExpressionUsage(false); - runner.setProperty(ReplaceText.REGEX, "(ell)"); + runner.setProperty(ReplaceText.SEARCH_VALUE, "(ell)"); runner.setProperty(ReplaceText.REPLACEMENT_VALUE, "$1$2"); final Map attributes = new HashMap<>(); @@ -193,7 +357,7 @@ public class TestReplaceText { public void testBackReferenceWithInvalidReferenceIsEscaped() throws IOException { final TestRunner runner = TestRunners.newTestRunner(new ReplaceText()); runner.setValidateExpressionUsage(false); - runner.setProperty(ReplaceText.REGEX, "(ell)"); + runner.setProperty(ReplaceText.SEARCH_VALUE, "(ell)"); runner.setProperty(ReplaceText.REPLACEMENT_VALUE, "$d"); final Map attributes = new HashMap<>(); @@ -212,7 +376,7 @@ public class TestReplaceText { public void testEscapingDollarSign() throws IOException { final TestRunner runner = TestRunners.newTestRunner(new ReplaceText()); runner.setValidateExpressionUsage(false); - runner.setProperty(ReplaceText.REGEX, "(ell)"); + runner.setProperty(ReplaceText.SEARCH_VALUE, "(ell)"); runner.setProperty(ReplaceText.REPLACEMENT_VALUE, "\\$1"); final Map attributes = new HashMap<>(); @@ -231,7 +395,7 @@ public class TestReplaceText { public void testReplaceWithEmptyString() throws IOException { final TestRunner runner = TestRunners.newTestRunner(new ReplaceText()); runner.setValidateExpressionUsage(false); - runner.setProperty(ReplaceText.REGEX, "(ell)"); + runner.setProperty(ReplaceText.SEARCH_VALUE, "(ell)"); runner.setProperty(ReplaceText.REPLACEMENT_VALUE, ""); runner.enqueue(Paths.get("src/test/resources/hello.txt")); @@ -246,7 +410,7 @@ public class TestReplaceText { public void testWithNoMatch() throws IOException { final TestRunner runner = TestRunners.newTestRunner(new ReplaceText()); runner.setValidateExpressionUsage(false); - runner.setProperty(ReplaceText.REGEX, "Z"); + runner.setProperty(ReplaceText.SEARCH_VALUE, "Z"); runner.setProperty(ReplaceText.REPLACEMENT_VALUE, "Morning"); runner.enqueue(Paths.get("src/test/resources/hello.txt")); @@ -261,7 +425,7 @@ public class TestReplaceText { public void testWithMultipleMatches() throws IOException { final TestRunner runner = TestRunners.newTestRunner(new ReplaceText()); runner.setValidateExpressionUsage(false); - runner.setProperty(ReplaceText.REGEX, "l"); + runner.setProperty(ReplaceText.SEARCH_VALUE, "l"); runner.setProperty(ReplaceText.REPLACEMENT_VALUE, "R"); runner.enqueue(Paths.get("src/test/resources/hello.txt")); @@ -276,7 +440,7 @@ public class TestReplaceText { public void testAttributeToContent() throws IOException { final TestRunner runner = TestRunners.newTestRunner(new ReplaceText()); runner.setValidateExpressionUsage(false); - runner.setProperty(ReplaceText.REGEX, ".*"); + runner.setProperty(ReplaceText.SEARCH_VALUE, ".*"); runner.setProperty(ReplaceText.REPLACEMENT_VALUE, "${abc}"); final Map attributes = new HashMap<>(); @@ -294,7 +458,7 @@ public class TestReplaceText { public void testRoutesToFailureIfTooLarge() throws IOException { final TestRunner runner = TestRunners.newTestRunner(new ReplaceText()); runner.setValidateExpressionUsage(false); - runner.setProperty(ReplaceText.REGEX, "[123]"); + runner.setProperty(ReplaceText.SEARCH_VALUE, "[123]"); runner.setProperty(ReplaceText.MAX_BUFFER_SIZE, "1 b"); runner.setProperty(ReplaceText.REPLACEMENT_VALUE, "${abc}"); @@ -311,7 +475,7 @@ public class TestReplaceText { public void testRoutesToSuccessIfTooLargeButRegexIsDotAsterisk() throws IOException { final TestRunner runner = TestRunners.newTestRunner(new ReplaceText()); runner.setValidateExpressionUsage(false); - runner.setProperty(ReplaceText.REGEX, ".*"); + runner.setProperty(ReplaceText.SEARCH_VALUE, ".*"); runner.setProperty(ReplaceText.MAX_BUFFER_SIZE, "1 b"); runner.setProperty(ReplaceText.REPLACEMENT_VALUE, "${abc}"); @@ -330,7 +494,7 @@ public class TestReplaceText { public void testProblematicCase1() throws IOException { final TestRunner runner = TestRunners.newTestRunner(new ReplaceText()); runner.setValidateExpressionUsage(false); - runner.setProperty(ReplaceText.REGEX, ".*"); + runner.setProperty(ReplaceText.SEARCH_VALUE, ".*"); runner.setProperty(ReplaceText.REPLACEMENT_VALUE, "${filename}\t${now():format(\"yyyy/MM/dd'T'HHmmss'Z'\")}\t${fileSize}\n"); final Map attributes = new HashMap<>(); @@ -351,7 +515,7 @@ public class TestReplaceText { public void testGetExistingContent() throws IOException { final TestRunner runner = TestRunners.newTestRunner(new ReplaceText()); runner.setValidateExpressionUsage(false); - runner.setProperty(ReplaceText.REGEX, "(?s)(^.*)"); + runner.setProperty(ReplaceText.SEARCH_VALUE, "(?s)(^.*)"); runner.setProperty(ReplaceText.REPLACEMENT_VALUE, "attribute header\n\n${filename}\n\ndata header\n\n$1\n\nfooter"); final Map attributes = new HashMap<>(); @@ -371,7 +535,7 @@ public class TestReplaceText { public void testReplaceWithinCurlyBraces() throws IOException { final TestRunner runner = TestRunners.newTestRunner(new ReplaceText()); runner.setValidateExpressionUsage(false); - runner.setProperty(ReplaceText.REGEX, ".+"); + runner.setProperty(ReplaceText.SEARCH_VALUE, ".+"); runner.setProperty(ReplaceText.REPLACEMENT_VALUE, "{ ${filename} }"); final Map attributes = new HashMap<>(); @@ -422,4 +586,421 @@ public class TestReplaceText { final MockFlowFile out = runner.getFlowFilesForRelationship(ReplaceText.REL_SUCCESS).get(0); out.assertContentEquals(defaultValue); } + + /* Line by Line */ + + @Test + public void testSimpleLineByLine() throws IOException { + final TestRunner runner = TestRunners.newTestRunner(new ReplaceText()); + runner.setValidateExpressionUsage(false); + runner.setProperty(ReplaceText.EVALUATION_MODE, ReplaceText.LINE_BY_LINE); + runner.setProperty(ReplaceText.SEARCH_VALUE, "odo"); + runner.setProperty(ReplaceText.REPLACEMENT_VALUE, "ood"); + + runner.enqueue(translateNewLines(Paths.get("src/test/resources/TestReplaceTextLineByLine/testFile.txt"))); + runner.run(); + + runner.assertAllFlowFilesTransferred(ReplaceText.REL_SUCCESS, 1); + final MockFlowFile out = runner.getFlowFilesForRelationship(ReplaceText.REL_SUCCESS).get(0); + out.assertContentEquals(translateNewLines(new File("src/test/resources/TestReplaceTextLineByLine/food.txt"))); + } + + @Test + public void testPrependSimpleLineByLine() throws IOException { + final TestRunner runner = TestRunners.newTestRunner(new ReplaceText()); + runner.setValidateExpressionUsage(false); + runner.setProperty(ReplaceText.EVALUATION_MODE, ReplaceText.LINE_BY_LINE); + runner.setProperty(ReplaceText.REPLACEMENT_STRATEGY, ReplaceText.PREPEND); + runner.setProperty(ReplaceText.REPLACEMENT_VALUE, "TEST "); + + runner.enqueue(translateNewLines(Paths.get("src/test/resources/TestReplaceTextLineByLine/testFile.txt"))); + runner.run(); + + runner.assertAllFlowFilesTransferred(ReplaceText.REL_SUCCESS, 1); + final MockFlowFile out = runner.getFlowFilesForRelationship(ReplaceText.REL_SUCCESS).get(0); + out.assertContentEquals(translateNewLines(new File("src/test/resources/TestReplaceTextLineByLine/PrependLineByLineTest.txt"))); + } + + @Test + public void testAppendSimpleLineByLine() throws IOException { + final TestRunner runner = TestRunners.newTestRunner(new ReplaceText()); + runner.setValidateExpressionUsage(false); + runner.setProperty(ReplaceText.EVALUATION_MODE, ReplaceText.LINE_BY_LINE); + runner.setProperty(ReplaceText.REPLACEMENT_STRATEGY, ReplaceText.APPEND); + runner.setProperty(ReplaceText.REPLACEMENT_VALUE, " TEST"); + + runner.enqueue(translateNewLines(Paths.get("src/test/resources/TestReplaceTextLineByLine/testFile.txt"))); + runner.run(); + + runner.assertAllFlowFilesTransferred(ReplaceText.REL_SUCCESS, 1); + final MockFlowFile out = runner.getFlowFilesForRelationship(ReplaceText.REL_SUCCESS).get(0); + out.assertContentEquals(translateNewLines(new File("src/test/resources/TestReplaceTextLineByLine/AppendLineByLineTest.txt"))); + } + + @Test + public void testAppendEndlineCR() throws IOException { + final TestRunner runner = TestRunners.newTestRunner(new ReplaceText()); + runner.setValidateExpressionUsage(false); + runner.setProperty(ReplaceText.EVALUATION_MODE, ReplaceText.LINE_BY_LINE); + runner.setProperty(ReplaceText.REPLACEMENT_VALUE, "TEST"); + runner.setProperty(ReplaceText.REPLACEMENT_STRATEGY, ReplaceText.APPEND); + + runner.enqueue("Hello \rWorld \r".getBytes()); + runner.run(); + + runner.assertAllFlowFilesTransferred(ReplaceText.REL_SUCCESS, 1); + final MockFlowFile out = runner.getFlowFilesForRelationship(ReplaceText.REL_SUCCESS).get(0); + out.assertContentEquals("Hello TEST\rWorld TEST\r".getBytes("UTF-8")); + } + + @Test + public void testAppendEndlineCRLF() throws IOException { + final TestRunner runner = TestRunners.newTestRunner(new ReplaceText()); + runner.setValidateExpressionUsage(false); + runner.setProperty(ReplaceText.EVALUATION_MODE, ReplaceText.LINE_BY_LINE); + runner.setProperty(ReplaceText.REPLACEMENT_VALUE, "TEST"); + runner.setProperty(ReplaceText.REPLACEMENT_STRATEGY, ReplaceText.APPEND); + + runner.enqueue("Hello \r\nWorld \r\n".getBytes()); + runner.run(); + + runner.assertAllFlowFilesTransferred(ReplaceText.REL_SUCCESS, 1); + final MockFlowFile out = runner.getFlowFilesForRelationship(ReplaceText.REL_SUCCESS).get(0); + out.assertContentEquals("Hello TEST\r\nWorld TEST\r\n".getBytes("UTF-8")); + } + + @Test + public void testSimpleLiteral() throws IOException { + final TestRunner runner = TestRunners.newTestRunner(new ReplaceText()); + runner.setValidateExpressionUsage(false); + runner.setProperty(ReplaceText.EVALUATION_MODE, ReplaceText.LINE_BY_LINE); + runner.setProperty(ReplaceText.SEARCH_VALUE, "odo"); + runner.setProperty(ReplaceText.REPLACEMENT_VALUE, "ood"); + runner.setProperty(ReplaceText.REPLACEMENT_STRATEGY, ReplaceText.LITERAL_REPLACE); + + runner.enqueue(translateNewLines(Paths.get("src/test/resources/TestReplaceTextLineByLine/testFile.txt"))); + runner.run(); + + runner.assertAllFlowFilesTransferred(ReplaceText.REL_SUCCESS, 1); + final MockFlowFile out = runner.getFlowFilesForRelationship(ReplaceText.REL_SUCCESS).get(0); + out.assertContentEquals(translateNewLines(new File("src/test/resources/TestReplaceTextLineByLine/food.txt"))); + } + + @Test + public void testLiteralBackReferenceLineByLine() throws IOException { + final TestRunner runner = TestRunners.newTestRunner(new ReplaceText()); + runner.setValidateExpressionUsage(false); + runner.setProperty(ReplaceText.EVALUATION_MODE, ReplaceText.LINE_BY_LINE); + runner.setProperty(ReplaceText.SEARCH_VALUE, "jo"); + runner.setProperty(ReplaceText.REPLACEMENT_VALUE, "[$1]"); + runner.setProperty(ReplaceText.REPLACEMENT_STRATEGY, ReplaceText.LITERAL_REPLACE); + + runner.enqueue(translateNewLines(Paths.get("src/test/resources/TestReplaceTextLineByLine/testFile.txt"))); + + runner.run(); + + runner.assertAllFlowFilesTransferred(ReplaceText.REL_SUCCESS, 1); + final MockFlowFile out = runner.getFlowFilesForRelationship(ReplaceText.REL_SUCCESS).get(0); + out.assertContentEquals(translateNewLines(new File("src/test/resources/TestReplaceTextLineByLine/cu[$1]_Po[$1].txt"))); + } + + @Test + public void testLiteralLineByLine() throws IOException { + final TestRunner runner = TestRunners.newTestRunner(new ReplaceText()); + runner.setValidateExpressionUsage(false); + runner.setProperty(ReplaceText.EVALUATION_MODE, ReplaceText.LINE_BY_LINE); + runner.setProperty(ReplaceText.SEARCH_VALUE, ".ell."); + runner.setProperty(ReplaceText.REPLACEMENT_VALUE, "test"); + runner.setProperty(ReplaceText.REPLACEMENT_STRATEGY, ReplaceText.LITERAL_REPLACE); + + runner.enqueue("H.ell.o, World! .ell. \n .ell. .ell.".getBytes()); + runner.run(); + + runner.assertAllFlowFilesTransferred(ReplaceText.REL_SUCCESS, 1); + final MockFlowFile out = runner.getFlowFilesForRelationship(ReplaceText.REL_SUCCESS).get(0); + out.assertContentEquals("Htesto, World! test \n test test"); + } + + @Test + public void testBackReferenceLineByLine() throws IOException { + final TestRunner runner = TestRunners.newTestRunner(new ReplaceText()); + runner.setValidateExpressionUsage(false); + runner.setProperty(ReplaceText.EVALUATION_MODE, ReplaceText.LINE_BY_LINE); + runner.setProperty(ReplaceText.SEARCH_VALUE, "(DODO)"); + runner.setProperty(ReplaceText.REPLACEMENT_VALUE, "[$1]"); + + runner.enqueue(translateNewLines(Paths.get("src/test/resources/TestReplaceTextLineByLine/testFile.txt"))); + runner.run(); + + runner.assertAllFlowFilesTransferred(ReplaceText.REL_SUCCESS, 1); + final MockFlowFile out = runner.getFlowFilesForRelationship(ReplaceText.REL_SUCCESS).get(0); + out.assertContentEquals(translateNewLines(new File("src/test/resources/TestReplaceTextLineByLine/[DODO].txt"))); + } + + @Test + public void testReplacementWithExpressionLanguageIsEscapedLineByLine() throws IOException { + final TestRunner runner = TestRunners.newTestRunner(new ReplaceText()); + runner.setValidateExpressionUsage(false); + runner.setProperty(ReplaceText.EVALUATION_MODE, ReplaceText.LINE_BY_LINE); + runner.setProperty(ReplaceText.SEARCH_VALUE, "(jo)"); + runner.setProperty(ReplaceText.REPLACEMENT_VALUE, "[${abc}]"); + + final Map attributes = new HashMap<>(); + attributes.put("abc", "$1"); + runner.enqueue(translateNewLines(Paths.get("src/test/resources/TestReplaceTextLineByLine/testFile.txt")), attributes); + + runner.run(); + + runner.assertAllFlowFilesTransferred(ReplaceText.REL_SUCCESS, 1); + final MockFlowFile out = runner.getFlowFilesForRelationship(ReplaceText.REL_SUCCESS).get(0); + out.assertContentEquals(translateNewLines(new File("src/test/resources/TestReplaceTextLineByLine/cu[$1]_Po[$1].txt"))); + } + + @Test + public void testRegexWithExpressionLanguageLineByLine() throws IOException { + final TestRunner runner = TestRunners.newTestRunner(new ReplaceText()); + runner.setValidateExpressionUsage(false); + runner.setProperty(ReplaceText.EVALUATION_MODE, ReplaceText.LINE_BY_LINE); + runner.setProperty(ReplaceText.SEARCH_VALUE, "${replaceKey}"); + runner.setProperty(ReplaceText.REPLACEMENT_VALUE, "${replaceValue}"); + + final Map attributes = new HashMap<>(); + attributes.put("replaceKey", "Riley"); + attributes.put("replaceValue", "Spider"); + runner.enqueue(translateNewLines(Paths.get("src/test/resources/TestReplaceTextLineByLine/testFile.txt")), attributes); + + runner.run(); + + runner.assertAllFlowFilesTransferred(ReplaceText.REL_SUCCESS, 1); + final MockFlowFile out = runner.getFlowFilesForRelationship(ReplaceText.REL_SUCCESS).get(0); + out.assertContentEquals(translateNewLines(new File("src/test/resources/TestReplaceTextLineByLine/Spider.txt"))); + } + + @Test + public void testRegexWithExpressionLanguageIsEscapedLineByLine() throws IOException { + final TestRunner runner = TestRunners.newTestRunner(new ReplaceText()); + runner.setValidateExpressionUsage(false); + runner.setProperty(ReplaceText.EVALUATION_MODE, ReplaceText.LINE_BY_LINE); + runner.setProperty(ReplaceText.SEARCH_VALUE, "${replaceKey}"); + runner.setProperty(ReplaceText.REPLACEMENT_VALUE, "${replaceValue}"); + + final Map attributes = new HashMap<>(); + attributes.put("replaceKey", "R.*y"); + attributes.put("replaceValue", "Spider"); + runner.enqueue(translateNewLines(Paths.get("src/test/resources/TestReplaceTextLineByLine/testFile.txt")), attributes); + + runner.run(); + + runner.assertAllFlowFilesTransferred(ReplaceText.REL_SUCCESS, 1); + final MockFlowFile out = runner.getFlowFilesForRelationship(ReplaceText.REL_SUCCESS).get(0); + out.assertContentEquals(translateNewLines(new File("src/test/resources/TestReplaceTextLineByLine/testFile.txt"))); + } + + @Test + public void testBackReferenceWithTooLargeOfIndexIsEscapedLineByLine() throws IOException { + final TestRunner runner = TestRunners.newTestRunner(new ReplaceText()); + runner.setValidateExpressionUsage(false); + runner.setProperty(ReplaceText.EVALUATION_MODE, ReplaceText.LINE_BY_LINE); + runner.setProperty(ReplaceText.SEARCH_VALUE, "(lu)"); + runner.setProperty(ReplaceText.REPLACEMENT_VALUE, "$1$2"); + + final Map attributes = new HashMap<>(); + attributes.put("replaceKey", "R.*y"); + attributes.put("replaceValue", "Spiderman"); + runner.enqueue(translateNewLines(Paths.get("src/test/resources/TestReplaceTextLineByLine/testFile.txt")), attributes); + + runner.run(); + + runner.assertAllFlowFilesTransferred(ReplaceText.REL_SUCCESS, 1); + final MockFlowFile out = runner.getFlowFilesForRelationship(ReplaceText.REL_SUCCESS).get(0); + out.assertContentEquals(translateNewLines(new File("src/test/resources/TestReplaceTextLineByLine/Blu$2e_clu$2e.txt"))); + } + + @Test + public void testBackReferenceWithInvalidReferenceIsEscapedLineByLine() throws IOException { + final TestRunner runner = TestRunners.newTestRunner(new ReplaceText()); + runner.setValidateExpressionUsage(false); + runner.setProperty(ReplaceText.EVALUATION_MODE, ReplaceText.LINE_BY_LINE); + runner.setProperty(ReplaceText.SEARCH_VALUE, "(ew)"); + runner.setProperty(ReplaceText.REPLACEMENT_VALUE, "$d"); + + final Map attributes = new HashMap<>(); + attributes.put("replaceKey", "H.*o"); + attributes.put("replaceValue", "Good-bye"); + runner.enqueue(translateNewLines(Paths.get("src/test/resources/TestReplaceTextLineByLine/testFile.txt")), attributes); + + runner.run(); + + runner.assertAllFlowFilesTransferred(ReplaceText.REL_SUCCESS, 1); + final MockFlowFile out = runner.getFlowFilesForRelationship(ReplaceText.REL_SUCCESS).get(0); + out.assertContentEquals(translateNewLines(new File("src/test/resources/TestReplaceTextLineByLine/D$d_h$d.txt"))); + } + + @Test + public void testEscapingDollarSignLineByLine() throws IOException { + final TestRunner runner = TestRunners.newTestRunner(new ReplaceText()); + runner.setValidateExpressionUsage(false); + runner.setProperty(ReplaceText.EVALUATION_MODE, ReplaceText.LINE_BY_LINE); + runner.setProperty(ReplaceText.SEARCH_VALUE, "(DO)"); + runner.setProperty(ReplaceText.REPLACEMENT_VALUE, "\\$1"); + + final Map attributes = new HashMap<>(); + attributes.put("replaceKey", "H.*o"); + attributes.put("replaceValue", "Good-bye"); + runner.enqueue(translateNewLines(Paths.get("src/test/resources/TestReplaceTextLineByLine/testFile.txt")), attributes); + + runner.run(); + + runner.assertAllFlowFilesTransferred(ReplaceText.REL_SUCCESS, 1); + final MockFlowFile out = runner.getFlowFilesForRelationship(ReplaceText.REL_SUCCESS).get(0); + out.assertContentEquals(translateNewLines(new File("src/test/resources/TestReplaceTextLineByLine/$1$1.txt"))); + } + + @Test + public void testReplaceWithEmptyStringLineByLine() throws IOException { + final TestRunner runner = TestRunners.newTestRunner(new ReplaceText()); + runner.setValidateExpressionUsage(false); + runner.setProperty(ReplaceText.EVALUATION_MODE, ReplaceText.LINE_BY_LINE); + runner.setProperty(ReplaceText.SEARCH_VALUE, "(jo)"); + runner.setProperty(ReplaceText.REPLACEMENT_VALUE, ""); + + runner.enqueue(translateNewLines(Paths.get("src/test/resources/TestReplaceTextLineByLine/testFile.txt"))); + runner.run(); + + runner.assertAllFlowFilesTransferred(ReplaceText.REL_SUCCESS, 1); + final MockFlowFile out = runner.getFlowFilesForRelationship(ReplaceText.REL_SUCCESS).get(0); + out.assertContentEquals(translateNewLines(new File("src/test/resources/TestReplaceTextLineByLine/cu_Po.txt"))); + } + + @Test + public void testWithNoMatchLineByLine() throws IOException { + final TestRunner runner = TestRunners.newTestRunner(new ReplaceText()); + runner.setValidateExpressionUsage(false); + runner.setProperty(ReplaceText.EVALUATION_MODE, ReplaceText.LINE_BY_LINE); + runner.setProperty(ReplaceText.SEARCH_VALUE, "Z"); + runner.setProperty(ReplaceText.REPLACEMENT_VALUE, "Morning"); + + runner.enqueue(translateNewLines(Paths.get("src/test/resources/TestReplaceTextLineByLine/testFile.txt"))); + runner.run(); + + runner.assertAllFlowFilesTransferred(ReplaceText.REL_SUCCESS, 1); + final MockFlowFile out = runner.getFlowFilesForRelationship(ReplaceText.REL_SUCCESS).get(0); + out.assertContentEquals(translateNewLines(new File("src/test/resources/TestReplaceTextLineByLine/testFile.txt"))); + } + + @Test + public void testWithMultipleMatchesLineByLine() throws IOException { + final TestRunner runner = TestRunners.newTestRunner(new ReplaceText()); + runner.setValidateExpressionUsage(false); + runner.setProperty(ReplaceText.EVALUATION_MODE, ReplaceText.LINE_BY_LINE); + runner.setProperty(ReplaceText.SEARCH_VALUE, "l"); + runner.setProperty(ReplaceText.REPLACEMENT_VALUE, "R"); + + runner.enqueue(translateNewLines(Paths.get("src/test/resources/TestReplaceTextLineByLine/testFile.txt"))); + runner.run(); + + runner.assertAllFlowFilesTransferred(ReplaceText.REL_SUCCESS, 1); + final MockFlowFile out = runner.getFlowFilesForRelationship(ReplaceText.REL_SUCCESS).get(0); + out.assertContentEquals(translateNewLines(new File("src/test/resources/TestReplaceTextLineByLine/BRue_cRue_RiRey.txt"))); + } + + @Test + public void testAttributeToContentLineByLine() throws IOException { + final TestRunner runner = TestRunners.newTestRunner(new ReplaceText()); + runner.setValidateExpressionUsage(false); + runner.setProperty(ReplaceText.EVALUATION_MODE, ReplaceText.LINE_BY_LINE); + runner.setProperty(ReplaceText.SEARCH_VALUE, ".*"); + runner.setProperty(ReplaceText.REPLACEMENT_VALUE, "${abc}"); + + final Map attributes = new HashMap<>(); + attributes.put("abc", "Good"); + runner.enqueue(translateNewLines(Paths.get("src/test/resources/TestReplaceTextLineByLine/testFile.txt")), attributes); + + runner.run(); + + runner.assertAllFlowFilesTransferred(ReplaceText.REL_SUCCESS, 1); + final MockFlowFile out = runner.getFlowFilesForRelationship(ReplaceText.REL_SUCCESS).get(0); + out.assertContentEquals(translateNewLines(new File("src/test/resources/TestReplaceTextLineByLine/Good.txt"))); + } + + @Test + public void testAttributeToContentWindows() throws IOException { + final TestRunner runner = TestRunners.newTestRunner(new ReplaceText()); + runner.setValidateExpressionUsage(false); + runner.setProperty(ReplaceText.EVALUATION_MODE, ReplaceText.LINE_BY_LINE); + runner.setProperty(ReplaceText.SEARCH_VALUE, ".*"); + runner.setProperty(ReplaceText.REPLACEMENT_VALUE, "${abc}"); + + final Map attributes = new HashMap<>(); + attributes.put("abc", "Good"); + runner.enqueue("<<
>>\r\n<>\r\n<<