NIFI-447 adding replacement strategy to ReplaceText processor

Signed-off-by: Mark Payne <markap14@hotmail.com>
This commit is contained in:
Joseph Percivall 2015-10-26 23:15:31 -04:00 committed by Mark Payne
parent 1c1738670c
commit 07238c7913
6 changed files with 954 additions and 477 deletions

View File

@ -190,15 +190,12 @@ language governing permissions and limitations under the License. -->
<artifactId>derby</artifactId> <artifactId>derby</artifactId>
<scope>test</scope> <scope>test</scope>
</dependency> </dependency>
<dependency> <dependency>
<groupId>com.h2database</groupId> <groupId>com.h2database</groupId>
<artifactId>h2</artifactId> <artifactId>h2</artifactId>
<version>1.4.187</version> <version>1.4.187</version>
<scope>test</scope> <scope>test</scope>
</dependency> </dependency>
</dependencies> </dependencies>
<build> <build>
@ -251,6 +248,8 @@ language governing permissions and limitations under the License. -->
<exclude>src/test/resources/TestReplaceTextLineByLine/cu_Po.txt</exclude> <exclude>src/test/resources/TestReplaceTextLineByLine/cu_Po.txt</exclude>
<exclude>src/test/resources/TestReplaceTextLineByLine/food.txt</exclude> <exclude>src/test/resources/TestReplaceTextLineByLine/food.txt</exclude>
<exclude>src/test/resources/TestReplaceTextLineByLine/testFile.txt</exclude> <exclude>src/test/resources/TestReplaceTextLineByLine/testFile.txt</exclude>
<exclude>src/test/resources/TestReplaceTextLineByLine/AppendLineByLineTest.txt</exclude>
<exclude>src/test/resources/TestReplaceTextLineByLine/PrependLineByLineTest.txt</exclude>
<exclude>src/test/resources/TestReplaceTextWithMapping/color-fruit-backreference-mapping.txt</exclude> <exclude>src/test/resources/TestReplaceTextWithMapping/color-fruit-backreference-mapping.txt</exclude>
<exclude>src/test/resources/TestReplaceTextWithMapping/color-fruit-blank-mapping.txt</exclude> <exclude>src/test/resources/TestReplaceTextWithMapping/color-fruit-blank-mapping.txt</exclude>
<exclude>src/test/resources/TestReplaceTextWithMapping/color-fruit-escaped-dollar-mapping.txt</exclude> <exclude>src/test/resources/TestReplaceTextWithMapping/color-fruit-escaped-dollar-mapping.txt</exclude>

View File

@ -35,10 +35,12 @@ import java.util.regex.Pattern;
import org.apache.nifi.annotation.behavior.EventDriven; import org.apache.nifi.annotation.behavior.EventDriven;
import org.apache.nifi.annotation.behavior.InputRequirement; import org.apache.nifi.annotation.behavior.InputRequirement;
import org.apache.nifi.annotation.behavior.InputRequirement.Requirement; import org.apache.nifi.annotation.behavior.InputRequirement.Requirement;
import org.apache.commons.io.IOUtils;
import org.apache.nifi.annotation.behavior.SideEffectFree; import org.apache.nifi.annotation.behavior.SideEffectFree;
import org.apache.nifi.annotation.behavior.SupportsBatching; import org.apache.nifi.annotation.behavior.SupportsBatching;
import org.apache.nifi.annotation.documentation.CapabilityDescription; import org.apache.nifi.annotation.documentation.CapabilityDescription;
import org.apache.nifi.annotation.documentation.Tags; import org.apache.nifi.annotation.documentation.Tags;
import org.apache.nifi.components.AllowableValue;
import org.apache.nifi.components.PropertyDescriptor; import org.apache.nifi.components.PropertyDescriptor;
import org.apache.nifi.components.Validator; import org.apache.nifi.components.Validator;
import org.apache.nifi.expression.AttributeValueDecorator; import org.apache.nifi.expression.AttributeValueDecorator;
@ -64,22 +66,40 @@ import org.apache.nifi.util.StopWatch;
@SupportsBatching @SupportsBatching
@InputRequirement(Requirement.INPUT_REQUIRED) @InputRequirement(Requirement.INPUT_REQUIRED)
@Tags({"Text", "Regular Expression", "Update", "Change", "Replace", "Modify", "Regex"}) @Tags({"Text", "Regular Expression", "Update", "Change", "Replace", "Modify", "Regex"})
@CapabilityDescription("Updates the content of a FlowFile by evaluating a Regular Expression against it and replacing the section of " @CapabilityDescription("Updates the content of a FlowFile by evaluating a Regular Expression (regex) against it and replacing the section of "
+ "the content that matches the Regular Expression with some alternate value.") + "the content that matches the Regular Expression with some alternate value.")
public class ReplaceText extends AbstractProcessor { public class ReplaceText extends AbstractProcessor {
//Constants // Constants
public static final String LINE_BY_LINE = "Line-by-Line"; public static final String LINE_BY_LINE = "Line-by-Line";
public static final String ENTIRE_TEXT = "Entire text"; public static final String ENTIRE_TEXT = "Entire text";
public static final String prependValue = "Prepend";
public static final String appendValue = "Append";
public static final String regexReplaceValue = "Regex Replace";
public static final String literalReplaceValue = "Literal Replace";
private final Pattern backReferencePattern = Pattern.compile("\\$(\\d+)"); private final Pattern backReferencePattern = Pattern.compile("\\$(\\d+)");
private static final byte[] ZERO_BYTE_BUFFER = new byte[0];
private static final String DEFAULT_REGEX = "(?s:^.*$)"; private static final String DEFAULT_REGEX = "(?s:^.*$)";
private static final String DEFAULT_REPLACEMENT_VALUE = "$1"; private static final String DEFAULT_REPLACEMENT_VALUE = "$1";
// Properties // Prepend and Append will just insert the replacement value at the beginning or end
public static final PropertyDescriptor REGEX = new PropertyDescriptor.Builder() // Properties PREPEND, APPEND, REGEX_REPLACE, LITERAL_REPLACE
static final AllowableValue PREPEND = new AllowableValue(prependValue, prependValue,
"Insert the Replacement Value at the beginning of the FlowFile or the beginning of each line (depending on the Evaluation Mode). For \"Line-by-Line\" Evaluation Mode, "
+ "the value will be prepended to each line. For \"Entire Text\" evaluation mode, the value will be prepended to the entire text.");
static final AllowableValue APPEND = new AllowableValue(appendValue, appendValue,
"Insert the Replacement Value at the end of the FlowFile or the end of each line (depending on the Evluation Mode). For \"Line-by-Line\" Evaluation Mode, "
+ "the value will be appended to each line. For \"Entire Text\" evaluation mode, the value will be appended to the entire text.");
static final AllowableValue LITERAL_REPLACE = new AllowableValue(literalReplaceValue, literalReplaceValue,
"Search for all instances of the Search Value and replace the matches with the Replacement Value.");
static final AllowableValue REGEX_REPLACE = new AllowableValue(regexReplaceValue, regexReplaceValue,
"Interpret the Search Value as a Regular Expression and replace all matches with the Replacement Value. The Replacement Value may reference Capturing Groups used "
+ "in the Search Value by using a dollar-sign followed by the Capturing Group number, such as $1 or $2. If the Search Value is set to .* then everything is replaced without "
+ "even evaluating the Regular Expression.");
public static final PropertyDescriptor SEARCH_VALUE = new PropertyDescriptor.Builder()
.name("Regular Expression") .name("Regular Expression")
.description("The Regular Expression to search for in the FlowFile content") .displayName("Search Value")
.description("The Search Value to search for in the FlowFile content. Only used for 'Literal Replace' and 'Regex Replace' matching strategies")
.required(true) .required(true)
.addValidator(StandardValidators.createRegexValidator(0, Integer.MAX_VALUE, true)) .addValidator(StandardValidators.createRegexValidator(0, Integer.MAX_VALUE, true))
.expressionLanguageSupported(true) .expressionLanguageSupported(true)
@ -87,8 +107,8 @@ public class ReplaceText extends AbstractProcessor {
.build(); .build();
public static final PropertyDescriptor REPLACEMENT_VALUE = new PropertyDescriptor.Builder() public static final PropertyDescriptor REPLACEMENT_VALUE = new PropertyDescriptor.Builder()
.name("Replacement Value") .name("Replacement Value")
.description("The value to replace the regular expression with. Back-references to Regular Expression capturing groups are supported, but " .description("The value to insert using the 'Replacement Strategy'. Using \"Regex Replace\" back-references to Regular Expression capturing groups "
+ "back-references that reference capturing groups that do not exist in the regular expression will be treated as literal value.") + "are supported, but back-references that reference capturing groups that do not exist in the regular expression will be treated as literal value.")
.required(true) .required(true)
.defaultValue(DEFAULT_REPLACEMENT_VALUE) .defaultValue(DEFAULT_REPLACEMENT_VALUE)
.addValidator(Validator.VALID) .addValidator(Validator.VALID)
@ -113,35 +133,44 @@ public class ReplaceText extends AbstractProcessor {
.addValidator(StandardValidators.DATA_SIZE_VALIDATOR) .addValidator(StandardValidators.DATA_SIZE_VALIDATOR)
.defaultValue("1 MB") .defaultValue("1 MB")
.build(); .build();
public static final PropertyDescriptor REPLACEMENT_STRATEGY = new PropertyDescriptor.Builder()
.name("Replacement Strategy")
.description("The strategy for how and what to replace within the FlowFile's text content.")
.allowableValues(PREPEND, APPEND, REGEX_REPLACE, LITERAL_REPLACE)
.defaultValue(REGEX_REPLACE.getValue())
.required(true)
.build();
public static final PropertyDescriptor EVALUATION_MODE = new PropertyDescriptor.Builder() public static final PropertyDescriptor EVALUATION_MODE = new PropertyDescriptor.Builder()
.name("Evaluation Mode") .name("Evaluation Mode")
.description("Evaluate the 'Regular Expression' against each line (Line-by-Line) or buffer the entire file into memory (Entire Text) and " .description("Run the 'Replacement Strategy' against each line separately (Line-by-Line) or buffer the entire file into memory (Entire Text) "
+ "then evaluate the 'Regular Expression'.") + "and run against that.")
.allowableValues(LINE_BY_LINE, ENTIRE_TEXT) .allowableValues(LINE_BY_LINE, ENTIRE_TEXT)
.defaultValue(ENTIRE_TEXT) .defaultValue(ENTIRE_TEXT)
.required(true) .required(true)
.build(); .build();
// Relationships // Relationships
public static final Relationship REL_SUCCESS = new Relationship.Builder() public static final Relationship REL_SUCCESS = new Relationship.Builder()
.name("success") .name("success")
.description("FlowFiles that have been successfully updated are routed to this relationship, as well as FlowFiles whose content does not " .description("FlowFiles that have been successfully processed are routed to this relationship. This includes both FlowFiles that had text"
+ "match the given Regular Expression") + " replaced and those that did not.")
.build(); .build();
public static final Relationship REL_FAILURE = new Relationship.Builder() public static final Relationship REL_FAILURE = new Relationship.Builder()
.name("failure") .name("failure")
.description("FlowFiles that could not be updated are routed to this relationship") .description("FlowFiles that could not be updated are routed to this relationship")
.build(); .build();
//
private List<PropertyDescriptor> properties; private List<PropertyDescriptor> properties;
private Set<Relationship> relationships; private Set<Relationship> relationships;
@Override @Override
protected void init(final ProcessorInitializationContext context) { protected void init(final ProcessorInitializationContext context) {
final List<PropertyDescriptor> properties = new ArrayList<>(); final List<PropertyDescriptor> properties = new ArrayList<>();
properties.add(REGEX); properties.add(SEARCH_VALUE);
properties.add(REPLACEMENT_VALUE); properties.add(REPLACEMENT_VALUE);
properties.add(CHARACTER_SET); properties.add(CHARACTER_SET);
properties.add(MAX_BUFFER_SIZE); properties.add(MAX_BUFFER_SIZE);
properties.add(REPLACEMENT_STRATEGY);
properties.add(EVALUATION_MODE); properties.add(EVALUATION_MODE);
this.properties = Collections.unmodifiableList(properties); this.properties = Collections.unmodifiableList(properties);
@ -169,22 +198,18 @@ public class ReplaceText extends AbstractProcessor {
} }
final ProcessorLog logger = getLogger(); final ProcessorLog logger = getLogger();
final String unsubstitutedRegex = context.getProperty(REGEX).getValue();
final String unsubstitutedRegex = context.getProperty(SEARCH_VALUE).getValue();
String unsubstitutedReplacement = context.getProperty(REPLACEMENT_VALUE).getValue(); String unsubstitutedReplacement = context.getProperty(REPLACEMENT_VALUE).getValue();
if (unsubstitutedRegex.equals(DEFAULT_REGEX) && unsubstitutedReplacement.equals(DEFAULT_REPLACEMENT_VALUE)) { final String replacementStrategy = context.getProperty(REPLACEMENT_STRATEGY).getValue();
if (replacementStrategy.equalsIgnoreCase(regexReplaceValue) && unsubstitutedRegex.equals(DEFAULT_REGEX) && unsubstitutedReplacement.equals(DEFAULT_REPLACEMENT_VALUE)) {
// This pattern says replace content with itself. We can highly optimize this process by simply transferring // This pattern says replace content with itself. We can highly optimize this process by simply transferring
// all FlowFiles to the 'success' relationship // all FlowFiles to the 'success' relationship
session.transfer(flowFiles, REL_SUCCESS); session.transfer(flowFiles, REL_SUCCESS);
return; return;
} }
final AttributeValueDecorator quotedAttributeDecorator = new AttributeValueDecorator() {
@Override
public String decorate(final String attributeValue) {
return Pattern.quote(attributeValue);
}
};
final AttributeValueDecorator escapeBackRefDecorator = new AttributeValueDecorator() { final AttributeValueDecorator escapeBackRefDecorator = new AttributeValueDecorator() {
@Override @Override
public String decorate(final String attributeValue) { public String decorate(final String attributeValue) {
@ -192,7 +217,7 @@ public class ReplaceText extends AbstractProcessor {
} }
}; };
final String regexValue = context.getProperty(REGEX).evaluateAttributeExpressions().getValue(); final String regexValue = context.getProperty(SEARCH_VALUE).evaluateAttributeExpressions().getValue();
final int numCapturingGroups = Pattern.compile(regexValue).matcher("").groupCount(); final int numCapturingGroups = Pattern.compile(regexValue).matcher("").groupCount();
final boolean skipBuffer = ".*".equals(unsubstitutedRegex); final boolean skipBuffer = ".*".equals(unsubstitutedRegex);
@ -200,9 +225,13 @@ public class ReplaceText extends AbstractProcessor {
final Charset charset = Charset.forName(context.getProperty(CHARACTER_SET).getValue()); final Charset charset = Charset.forName(context.getProperty(CHARACTER_SET).getValue());
final int maxBufferSize = context.getProperty(MAX_BUFFER_SIZE).asDataSize(DataUnit.B).intValue(); final int maxBufferSize = context.getProperty(MAX_BUFFER_SIZE).asDataSize(DataUnit.B).intValue();
final byte[] buffer = skipBuffer ? ZERO_BYTE_BUFFER : new byte[maxBufferSize];
final String evaluateMode = context.getProperty(EVALUATION_MODE).getValue(); final String evaluateMode = context.getProperty(EVALUATION_MODE).getValue();
final byte[] buffer;
if (replacementStrategy.equalsIgnoreCase(regexReplaceValue) || replacementStrategy.equalsIgnoreCase(literalReplaceValue)) {
buffer = new byte[maxBufferSize];
} else {
buffer = null;
}
for (FlowFile flowFile : flowFiles) { for (FlowFile flowFile : flowFiles) {
if (evaluateMode.equalsIgnoreCase(ENTIRE_TEXT)) { if (evaluateMode.equalsIgnoreCase(ENTIRE_TEXT)) {
@ -212,7 +241,11 @@ public class ReplaceText extends AbstractProcessor {
} }
} }
String replacement = context.getProperty(REPLACEMENT_VALUE).evaluateAttributeExpressions(flowFile, escapeBackRefDecorator).getValue(); String replacement;
if (!replacementStrategy.equals(regexReplaceValue)) {
replacement = context.getProperty(REPLACEMENT_VALUE).evaluateAttributeExpressions(flowFile).getValue();
} else {
replacement = context.getProperty(REPLACEMENT_VALUE).evaluateAttributeExpressions(flowFile, escapeBackRefDecorator).getValue();
final Matcher backRefMatcher = backReferencePattern.matcher(replacement); final Matcher backRefMatcher = backReferencePattern.matcher(replacement);
while (backRefMatcher.find()) { while (backRefMatcher.find()) {
final String backRefNum = backRefMatcher.group(1); final String backRefNum = backRefMatcher.group(1);
@ -240,52 +273,48 @@ public class ReplaceText extends AbstractProcessor {
replacement = sb.toString(); replacement = sb.toString();
} }
} }
}
replacement = replacement.replaceAll("(\\$\\D)", "\\\\$1"); ReplacementStrategyExecutor replacementStrategyExecutor;
switch (replacementStrategy) {
case prependValue:
replacementStrategyExecutor = new PrependReplace();
break;
case appendValue:
replacementStrategyExecutor = new AppendReplace();
break;
case regexReplaceValue:
replacementStrategyExecutor = new RegexReplace(buffer);
break;
case literalReplaceValue:
replacementStrategyExecutor = new LiteralReplace(buffer);
break;
default:
throw new AssertionError();
}
// always match; just overwrite value with the replacement value; this optimization prevents us
// from reading the file at all.
final String replacementValue = replacement;
if (skipBuffer) {
final StopWatch stopWatch = new StopWatch(true); final StopWatch stopWatch = new StopWatch(true);
if (evaluateMode.equalsIgnoreCase(ENTIRE_TEXT)) {
flowFile = session.write(flowFile, new OutputStreamCallback() { flowFile = replacementStrategyExecutor.replace(flowFile, session, context, replacement, evaluateMode,
@Override charset, maxBufferSize, skipBuffer);
public void process(final OutputStream out) throws IOException {
out.write(replacementValue.getBytes(charset)); logger.info("Transferred {} to 'success'", new Object[] {flowFile});
}
});
} else {
flowFile = session.write(flowFile, new StreamCallback() {
@Override
public void process(final InputStream in, final OutputStream out) throws IOException {
try (NLKBufferedReader br = new NLKBufferedReader(new InputStreamReader(in, charset), maxBufferSize);
BufferedWriter bw = new BufferedWriter(new OutputStreamWriter(out, charset));) {
while (null != br.readLine()) {
bw.write(replacementValue);
}
}
}
});
}
session.getProvenanceReporter().modifyContent(flowFile, stopWatch.getElapsed(TimeUnit.MILLISECONDS)); session.getProvenanceReporter().modifyContent(flowFile, stopWatch.getElapsed(TimeUnit.MILLISECONDS));
session.transfer(flowFile, REL_SUCCESS); session.transfer(flowFile, REL_SUCCESS);
logger.info("Transferred {} to 'success'", new Object[]{flowFile}); }
continue;
} }
final StopWatch stopWatch = new StopWatch(true); private static class PrependReplace implements ReplacementStrategyExecutor {
final String regex = context.getProperty(REGEX).evaluateAttributeExpressions(flowFile, quotedAttributeDecorator).getValue();
@Override
public FlowFile replace(FlowFile flowFile, final ProcessSession session, final ProcessContext context, final String replacementValue, final String evaluateMode,
final Charset charset, final int maxBufferSize, final boolean skipBuffer) {
if (evaluateMode.equalsIgnoreCase(ENTIRE_TEXT)) { if (evaluateMode.equalsIgnoreCase(ENTIRE_TEXT)) {
final int flowFileSize = (int) flowFile.getSize();
flowFile = session.write(flowFile, new StreamCallback() { flowFile = session.write(flowFile, new StreamCallback() {
@Override @Override
public void process(final InputStream in, final OutputStream out) throws IOException { public void process(final InputStream in, final OutputStream out) throws IOException {
StreamUtils.fillBuffer(in, buffer, false); out.write(replacementValue.getBytes(charset));
final String contentString = new String(buffer, 0, flowFileSize, charset); IOUtils.copy(in, out);
final String updatedValue = contentString.replaceAll(regex, replacementValue);
out.write(updatedValue.getBytes(charset));
} }
}); });
} else { } else {
@ -296,17 +325,199 @@ public class ReplaceText extends AbstractProcessor {
BufferedWriter bw = new BufferedWriter(new OutputStreamWriter(out, charset));) { BufferedWriter bw = new BufferedWriter(new OutputStreamWriter(out, charset));) {
String oneLine; String oneLine;
while (null != (oneLine = br.readLine())) { while (null != (oneLine = br.readLine())) {
final String updatedValue = oneLine.replaceAll(regex, replacementValue); final String updatedValue = replacementValue.concat(oneLine);
bw.write(updatedValue); bw.write(updatedValue);
} }
} }
} }
}); });
} }
return flowFile;
logger.info("Transferred {} to 'success'", new Object[]{flowFile});
session.getProvenanceReporter().modifyContent(flowFile, stopWatch.getElapsed(TimeUnit.MILLISECONDS));
session.transfer(flowFile, REL_SUCCESS);
} }
} }
private static class AppendReplace implements ReplacementStrategyExecutor {
@Override
public FlowFile replace(FlowFile flowFile, final ProcessSession session, final ProcessContext context, final String replacementValue, final String evaluateMode,
final Charset charset, final int maxBufferSize, final boolean skipBuffer) {
if (evaluateMode.equalsIgnoreCase(ENTIRE_TEXT)) {
flowFile = session.write(flowFile, new StreamCallback() {
@Override
public void process(final InputStream in, final OutputStream out) throws IOException {
IOUtils.copy(in, out);
out.write(replacementValue.getBytes(charset));
}
});
} else {
flowFile = session.write(flowFile, new StreamCallback() {
@Override
public void process(final InputStream in, final OutputStream out) throws IOException {
try (NLKBufferedReader br = new NLKBufferedReader(new InputStreamReader(in, charset), maxBufferSize);
BufferedWriter bw = new BufferedWriter(new OutputStreamWriter(out, charset));) {
String oneLine;
while (null != (oneLine = br.readLine())) {
// we need to find the first carriage return or new-line so that we can append the new value
// before the line separate. However, we don't want to do this using a regular expression due
// to performance concerns. So we will find the first occurrence of either \r or \n and use
// that to insert the replacement value.
boolean foundNewLine = false;
for (int i = 0; i < oneLine.length(); i++) {
final char c = oneLine.charAt(i);
if (foundNewLine) {
bw.write(c);
continue;
}
if (c == '\r' || c == '\n') {
bw.write(replacementValue);
foundNewLine = true;
}
bw.write(c);
}
if (!foundNewLine) {
bw.write(replacementValue);
}
}
}
}
});
}
return flowFile;
}
}
private static class RegexReplace implements ReplacementStrategyExecutor {
private final byte[] buffer;
public RegexReplace(final byte[] buffer) {
this.buffer = buffer;
}
@Override
public FlowFile replace(FlowFile flowFile, final ProcessSession session, final ProcessContext context, final String replacementValue, final String evaluateMode,
final Charset charset, final int maxBufferSize, final boolean skipBuffer) {
final String replacementFinal = replacementValue.replaceAll("(\\$\\D)", "\\\\$1");
// always match; just overwrite value with the replacement value; this optimization prevents us
// from reading the file at all.
if (skipBuffer) {
if (evaluateMode.equalsIgnoreCase(ENTIRE_TEXT)) {
flowFile = session.write(flowFile, new OutputStreamCallback() {
@Override
public void process(final OutputStream out) throws IOException {
out.write(replacementFinal.getBytes(charset));
}
});
} else {
flowFile = session.write(flowFile, new StreamCallback() {
@Override
public void process(final InputStream in, final OutputStream out) throws IOException {
try (NLKBufferedReader br = new NLKBufferedReader(new InputStreamReader(in, charset), maxBufferSize);
BufferedWriter bw = new BufferedWriter(new OutputStreamWriter(out, charset));) {
while (null != br.readLine()) {
bw.write(replacementFinal);
}
}
}
});
}
} else {
final AttributeValueDecorator quotedAttributeDecorator = new AttributeValueDecorator() {
@Override
public String decorate(final String attributeValue) {
return Pattern.quote(attributeValue);
}
};
final String searchRegex = context.getProperty(SEARCH_VALUE).evaluateAttributeExpressions(flowFile, quotedAttributeDecorator).getValue();
final int flowFileSize = (int) flowFile.getSize();
if (evaluateMode.equalsIgnoreCase(ENTIRE_TEXT)) {
flowFile = session.write(flowFile, new StreamCallback() {
@Override
public void process(final InputStream in, final OutputStream out) throws IOException {
StreamUtils.fillBuffer(in, buffer, false);
final String contentString = new String(buffer, 0, flowFileSize, charset);
final String updatedValue = contentString.replaceAll(searchRegex, replacementFinal);
out.write(updatedValue.getBytes(charset));
}
});
} else {
flowFile = session.write(flowFile, new StreamCallback() {
@Override
public void process(final InputStream in, final OutputStream out) throws IOException {
try (NLKBufferedReader br = new NLKBufferedReader(new InputStreamReader(in, charset), maxBufferSize);
BufferedWriter bw = new BufferedWriter(new OutputStreamWriter(out, charset))) {
String oneLine;
while (null != (oneLine = br.readLine())) {
final String updatedValue = oneLine.replaceAll(searchRegex, replacementFinal);
bw.write(updatedValue);
}
}
}
});
}
}
return flowFile;
}
}
private static class LiteralReplace implements ReplacementStrategyExecutor {
private final byte[] buffer;
public LiteralReplace(final byte[] buffer) {
this.buffer = buffer;
}
@Override
public FlowFile replace(FlowFile flowFile, final ProcessSession session, final ProcessContext context, final String replacementValue, final String evaluateMode,
final Charset charset, final int maxBufferSize, final boolean skipBuffer) {
final AttributeValueDecorator quotedAttributeDecorator = new AttributeValueDecorator() {
@Override
public String decorate(final String attributeValue) {
return Pattern.quote(attributeValue);
}
};
final String searchValue = context.getProperty(SEARCH_VALUE).evaluateAttributeExpressions(flowFile, quotedAttributeDecorator).getValue();
final int flowFileSize = (int) flowFile.getSize();
if (evaluateMode.equalsIgnoreCase(ENTIRE_TEXT)) {
flowFile = session.write(flowFile, new StreamCallback() {
@Override
public void process(final InputStream in, final OutputStream out) throws IOException {
StreamUtils.fillBuffer(in, buffer, false);
final String contentString = new String(buffer, 0, flowFileSize, charset);
// Interpreting the search and replacement values as char sequences
final String updatedValue = contentString.replace(searchValue, replacementValue);
out.write(updatedValue.getBytes(charset));
}
});
} else {
flowFile = session.write(flowFile, new StreamCallback() {
@Override
public void process(final InputStream in, final OutputStream out) throws IOException {
try (NLKBufferedReader br = new NLKBufferedReader(new InputStreamReader(in, charset), maxBufferSize);
BufferedWriter bw = new BufferedWriter(new OutputStreamWriter(out, charset))) {
String oneLine;
while (null != (oneLine = br.readLine())) {
// Interpreting the search and replacement values as char sequences
final String updatedValue = oneLine.replace(searchValue, replacementValue);
bw.write(updatedValue);
}
}
}
});
}
return flowFile;
}
}
private interface ReplacementStrategyExecutor {
FlowFile replace(FlowFile flowFile, ProcessSession session, ProcessContext context, String replacement, String evaluateMode, Charset charset, int maxBufferSize, boolean skipBuffer);
}
} }

View File

@ -16,25 +16,43 @@
*/ */
package org.apache.nifi.processors.standard; package org.apache.nifi.processors.standard;
import java.io.File;
import java.io.IOException;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.HashMap;
import java.util.Map;
import java.util.regex.Pattern;
import org.apache.nifi.util.MockFlowFile; import org.apache.nifi.util.MockFlowFile;
import org.apache.nifi.util.TestRunner; import org.apache.nifi.util.TestRunner;
import org.apache.nifi.util.TestRunners; import org.apache.nifi.util.TestRunners;
import org.junit.Assert; import org.junit.Assert;
import org.junit.Test; import org.junit.Test;
import java.io.IOException;
import java.nio.charset.StandardCharsets;
import java.nio.file.Paths;
import java.util.HashMap;
import java.util.Map;
public class TestReplaceText { public class TestReplaceText {
@Test
public void testConfigurationCornerCase() throws IOException {
final TestRunner runner = TestRunners.newTestRunner(new ReplaceText());
runner.setValidateExpressionUsage(false);
runner.run();
runner.enqueue(Paths.get("src/test/resources/hello.txt"));
runner.run();
runner.assertAllFlowFilesTransferred(ReplaceText.REL_SUCCESS, 1);
final MockFlowFile out = runner.getFlowFilesForRelationship(ReplaceText.REL_SUCCESS).get(0);
out.assertContentEquals(Paths.get("src/test/resources/hello.txt"));
}
@Test @Test
public void testSimple() throws IOException { public void testSimple() throws IOException {
final TestRunner runner = TestRunners.newTestRunner(new ReplaceText()); final TestRunner runner = TestRunners.newTestRunner(new ReplaceText());
runner.setValidateExpressionUsage(false); runner.setValidateExpressionUsage(false);
runner.setProperty(ReplaceText.REGEX, "ell"); runner.setProperty(ReplaceText.SEARCH_VALUE, "ell");
runner.setProperty(ReplaceText.REPLACEMENT_VALUE, "lle"); runner.setProperty(ReplaceText.REPLACEMENT_VALUE, "lle");
runner.enqueue(Paths.get("src/test/resources/hello.txt")); runner.enqueue(Paths.get("src/test/resources/hello.txt"));
@ -45,11 +63,157 @@ public class TestReplaceText {
out.assertContentEquals("Hlleo, World!".getBytes("UTF-8")); out.assertContentEquals("Hlleo, World!".getBytes("UTF-8"));
} }
@Test
public void testPrependSimple() throws IOException {
final TestRunner runner = TestRunners.newTestRunner(new ReplaceText());
runner.setValidateExpressionUsage(false);
runner.setProperty(ReplaceText.REPLACEMENT_VALUE, "TEST");
runner.setProperty(ReplaceText.REPLACEMENT_STRATEGY, ReplaceText.PREPEND);
runner.enqueue(Paths.get("src/test/resources/hello.txt"));
runner.run();
runner.assertAllFlowFilesTransferred(ReplaceText.REL_SUCCESS, 1);
final MockFlowFile out = runner.getFlowFilesForRelationship(ReplaceText.REL_SUCCESS).get(0);
out.assertContentEquals("TESTHello, World!".getBytes("UTF-8"));
}
@Test
public void testPrependLineByLine() throws IOException {
final TestRunner runner = TestRunners.newTestRunner(new ReplaceText());
runner.setValidateExpressionUsage(false);
runner.setProperty(ReplaceText.REPLACEMENT_VALUE, "_");
runner.setProperty(ReplaceText.REPLACEMENT_STRATEGY, ReplaceText.PREPEND);
runner.setProperty(ReplaceText.EVALUATION_MODE, ReplaceText.LINE_BY_LINE);
runner.enqueue("hello\nthere\nmadam".getBytes());
runner.run();
runner.assertAllFlowFilesTransferred(ReplaceText.REL_SUCCESS, 1);
final MockFlowFile out = runner.getFlowFilesForRelationship(ReplaceText.REL_SUCCESS).get(0);
out.assertContentEquals("_hello\n_there\n_madam".getBytes("UTF-8"));
}
@Test
public void testAppendSimple() throws IOException {
final TestRunner runner = TestRunners.newTestRunner(new ReplaceText());
runner.setValidateExpressionUsage(false);
runner.setProperty(ReplaceText.REPLACEMENT_VALUE, "TEST");
runner.setProperty(ReplaceText.REPLACEMENT_STRATEGY, ReplaceText.APPEND);
runner.enqueue(Paths.get("src/test/resources/hello.txt"));
runner.run();
runner.assertAllFlowFilesTransferred(ReplaceText.REL_SUCCESS, 1);
final MockFlowFile out = runner.getFlowFilesForRelationship(ReplaceText.REL_SUCCESS).get(0);
out.assertContentEquals("Hello, World!TEST".getBytes("UTF-8"));
}
@Test
public void testAppendWithCarriageReturn() throws IOException {
final TestRunner runner = TestRunners.newTestRunner(new ReplaceText());
runner.setValidateExpressionUsage(false);
runner.setProperty(ReplaceText.REPLACEMENT_VALUE, "!");
runner.setProperty(ReplaceText.REPLACEMENT_STRATEGY, ReplaceText.APPEND);
runner.setProperty(ReplaceText.EVALUATION_MODE, ReplaceText.LINE_BY_LINE);
runner.enqueue("hello\rthere\rsir".getBytes());
runner.run();
runner.assertAllFlowFilesTransferred(ReplaceText.REL_SUCCESS, 1);
final MockFlowFile out = runner.getFlowFilesForRelationship(ReplaceText.REL_SUCCESS).get(0);
out.assertContentEquals("hello!\rthere!\rsir!");
}
@Test
public void testAppendWithNewLine() throws IOException {
final TestRunner runner = TestRunners.newTestRunner(new ReplaceText());
runner.setValidateExpressionUsage(false);
runner.setProperty(ReplaceText.REPLACEMENT_VALUE, "!");
runner.setProperty(ReplaceText.REPLACEMENT_STRATEGY, ReplaceText.APPEND);
runner.setProperty(ReplaceText.EVALUATION_MODE, ReplaceText.LINE_BY_LINE);
runner.enqueue("hello\nthere\nsir".getBytes());
runner.run();
runner.assertAllFlowFilesTransferred(ReplaceText.REL_SUCCESS, 1);
final MockFlowFile out = runner.getFlowFilesForRelationship(ReplaceText.REL_SUCCESS).get(0);
out.assertContentEquals("hello!\nthere!\nsir!");
}
@Test
public void testAppendWithCarriageReturnNewLine() throws IOException {
final TestRunner runner = TestRunners.newTestRunner(new ReplaceText());
runner.setValidateExpressionUsage(false);
runner.setProperty(ReplaceText.REPLACEMENT_VALUE, "!");
runner.setProperty(ReplaceText.REPLACEMENT_STRATEGY, ReplaceText.APPEND);
runner.setProperty(ReplaceText.EVALUATION_MODE, ReplaceText.LINE_BY_LINE);
runner.enqueue("hello\r\nthere\r\nsir".getBytes());
runner.run();
runner.assertAllFlowFilesTransferred(ReplaceText.REL_SUCCESS, 1);
final MockFlowFile out = runner.getFlowFilesForRelationship(ReplaceText.REL_SUCCESS).get(0);
out.assertContentEquals("hello!\r\nthere!\r\nsir!");
}
@Test
public void testLiteralSimple() throws IOException {
final TestRunner runner = TestRunners.newTestRunner(new ReplaceText());
runner.setValidateExpressionUsage(false);
runner.setProperty(ReplaceText.SEARCH_VALUE, "ell");
runner.setProperty(ReplaceText.REPLACEMENT_VALUE, "lle");
runner.setProperty(ReplaceText.REPLACEMENT_STRATEGY, ReplaceText.LITERAL_REPLACE);
runner.enqueue(Paths.get("src/test/resources/hello.txt"));
runner.run();
runner.assertAllFlowFilesTransferred(ReplaceText.REL_SUCCESS, 1);
final MockFlowFile out = runner.getFlowFilesForRelationship(ReplaceText.REL_SUCCESS).get(0);
out.assertContentEquals("Hlleo, World!".getBytes("UTF-8"));
}
@Test
public void testLiteralBackReference() throws IOException {
final TestRunner runner = TestRunners.newTestRunner(new ReplaceText());
runner.setValidateExpressionUsage(false);
runner.setProperty(ReplaceText.SEARCH_VALUE, "ell");
runner.setProperty(ReplaceText.REPLACEMENT_VALUE, "[$1]");
runner.setProperty(ReplaceText.REPLACEMENT_STRATEGY, ReplaceText.LITERAL_REPLACE);
runner.enqueue(Paths.get("src/test/resources/hello.txt"));
runner.run();
runner.assertAllFlowFilesTransferred(ReplaceText.REL_SUCCESS, 1);
final MockFlowFile out = runner.getFlowFilesForRelationship(ReplaceText.REL_SUCCESS).get(0);
out.assertContentEquals("H[$1]o, World!");
}
@Test
public void testLiteral() throws IOException {
final TestRunner runner = TestRunners.newTestRunner(new ReplaceText());
runner.setValidateExpressionUsage(false);
runner.setProperty(ReplaceText.SEARCH_VALUE, ".ell.");
runner.setProperty(ReplaceText.REPLACEMENT_VALUE, "test");
runner.setProperty(ReplaceText.REPLACEMENT_STRATEGY, ReplaceText.LITERAL_REPLACE);
runner.enqueue(Paths.get("src/test/resources/hello.txt"));
runner.run();
runner.enqueue("H.ell.o, World! .ell.".getBytes());
runner.run();
runner.assertAllFlowFilesTransferred(ReplaceText.REL_SUCCESS, 2);
final MockFlowFile out = runner.getFlowFilesForRelationship(ReplaceText.REL_SUCCESS).get(0);
out.assertContentEquals("Hello, World!");
final MockFlowFile out2 = runner.getFlowFilesForRelationship(ReplaceText.REL_SUCCESS).get(1);
out2.assertContentEquals("Htesto, World! test");
}
@Test @Test
public void testBackReference() throws IOException { public void testBackReference() throws IOException {
final TestRunner runner = TestRunners.newTestRunner(new ReplaceText()); final TestRunner runner = TestRunners.newTestRunner(new ReplaceText());
runner.setValidateExpressionUsage(false); runner.setValidateExpressionUsage(false);
runner.setProperty(ReplaceText.REGEX, "(ell)"); runner.setProperty(ReplaceText.SEARCH_VALUE, "(ell)");
runner.setProperty(ReplaceText.REPLACEMENT_VALUE, "[$1]"); runner.setProperty(ReplaceText.REPLACEMENT_VALUE, "[$1]");
runner.enqueue(Paths.get("src/test/resources/hello.txt")); runner.enqueue(Paths.get("src/test/resources/hello.txt"));
@ -65,7 +229,7 @@ public class TestReplaceText {
final TestRunner runner = TestRunners.newTestRunner(new ReplaceText()); final TestRunner runner = TestRunners.newTestRunner(new ReplaceText());
runner.setValidateExpressionUsage(false); runner.setValidateExpressionUsage(false);
String expected = "Hell23o, World!"; String expected = "Hell23o, World!";
runner.setProperty(ReplaceText.REGEX, "(ell)"); runner.setProperty(ReplaceText.SEARCH_VALUE, "(ell)");
runner.setProperty(ReplaceText.REPLACEMENT_VALUE, "$123"); runner.setProperty(ReplaceText.REPLACEMENT_VALUE, "$123");
final Map<String, String> attributes = new HashMap<>(); final Map<String, String> attributes = new HashMap<>();
@ -83,7 +247,7 @@ public class TestReplaceText {
public void testBackRefWithNoCapturingGroup() throws IOException { public void testBackRefWithNoCapturingGroup() throws IOException {
final TestRunner runner = TestRunners.newTestRunner(new ReplaceText()); final TestRunner runner = TestRunners.newTestRunner(new ReplaceText());
runner.setValidateExpressionUsage(false); runner.setValidateExpressionUsage(false);
runner.setProperty(ReplaceText.REGEX, "ell"); runner.setProperty(ReplaceText.SEARCH_VALUE, "ell");
runner.setProperty(ReplaceText.REPLACEMENT_VALUE, "$0123"); runner.setProperty(ReplaceText.REPLACEMENT_VALUE, "$0123");
final Map<String, String> attributes = new HashMap<>(); final Map<String, String> attributes = new HashMap<>();
@ -97,10 +261,10 @@ public class TestReplaceText {
} }
@Test @Test
public void testAmy3() throws IOException { public void testReplacementWithExpressionLanguage() throws IOException {
final TestRunner runner = TestRunners.newTestRunner(new ReplaceText()); final TestRunner runner = TestRunners.newTestRunner(new ReplaceText());
runner.setValidateExpressionUsage(false); runner.setValidateExpressionUsage(false);
runner.setProperty(ReplaceText.REGEX, "${replaceKey}"); runner.setProperty(ReplaceText.SEARCH_VALUE, "${replaceKey}");
runner.setProperty(ReplaceText.REPLACEMENT_VALUE, "GoodBye"); runner.setProperty(ReplaceText.REPLACEMENT_VALUE, "GoodBye");
final Map<String, String> attributes = new HashMap<>(); final Map<String, String> attributes = new HashMap<>();
@ -118,7 +282,7 @@ public class TestReplaceText {
public void testReplacementWithExpressionLanguageIsEscaped() throws IOException { public void testReplacementWithExpressionLanguageIsEscaped() throws IOException {
final TestRunner runner = TestRunners.newTestRunner(new ReplaceText()); final TestRunner runner = TestRunners.newTestRunner(new ReplaceText());
runner.setValidateExpressionUsage(false); runner.setValidateExpressionUsage(false);
runner.setProperty(ReplaceText.REGEX, "(ell)"); runner.setProperty(ReplaceText.SEARCH_VALUE, "(ell)");
runner.setProperty(ReplaceText.REPLACEMENT_VALUE, "[${abc}]"); runner.setProperty(ReplaceText.REPLACEMENT_VALUE, "[${abc}]");
final Map<String, String> attributes = new HashMap<>(); final Map<String, String> attributes = new HashMap<>();
@ -136,7 +300,7 @@ public class TestReplaceText {
public void testRegexWithExpressionLanguage() throws IOException { public void testRegexWithExpressionLanguage() throws IOException {
final TestRunner runner = TestRunners.newTestRunner(new ReplaceText()); final TestRunner runner = TestRunners.newTestRunner(new ReplaceText());
runner.setValidateExpressionUsage(false); runner.setValidateExpressionUsage(false);
runner.setProperty(ReplaceText.REGEX, "${replaceKey}"); runner.setProperty(ReplaceText.SEARCH_VALUE, "${replaceKey}");
runner.setProperty(ReplaceText.REPLACEMENT_VALUE, "${replaceValue}"); runner.setProperty(ReplaceText.REPLACEMENT_VALUE, "${replaceValue}");
final Map<String, String> attributes = new HashMap<>(); final Map<String, String> attributes = new HashMap<>();
@ -155,7 +319,7 @@ public class TestReplaceText {
public void testRegexWithExpressionLanguageIsEscaped() throws IOException { public void testRegexWithExpressionLanguageIsEscaped() throws IOException {
final TestRunner runner = TestRunners.newTestRunner(new ReplaceText()); final TestRunner runner = TestRunners.newTestRunner(new ReplaceText());
runner.setValidateExpressionUsage(false); runner.setValidateExpressionUsage(false);
runner.setProperty(ReplaceText.REGEX, "${replaceKey}"); runner.setProperty(ReplaceText.SEARCH_VALUE, "${replaceKey}");
runner.setProperty(ReplaceText.REPLACEMENT_VALUE, "${replaceValue}"); runner.setProperty(ReplaceText.REPLACEMENT_VALUE, "${replaceValue}");
final Map<String, String> attributes = new HashMap<>(); final Map<String, String> attributes = new HashMap<>();
@ -174,7 +338,7 @@ public class TestReplaceText {
public void testBackReferenceWithTooLargeOfIndexIsEscaped() throws IOException { public void testBackReferenceWithTooLargeOfIndexIsEscaped() throws IOException {
final TestRunner runner = TestRunners.newTestRunner(new ReplaceText()); final TestRunner runner = TestRunners.newTestRunner(new ReplaceText());
runner.setValidateExpressionUsage(false); runner.setValidateExpressionUsage(false);
runner.setProperty(ReplaceText.REGEX, "(ell)"); runner.setProperty(ReplaceText.SEARCH_VALUE, "(ell)");
runner.setProperty(ReplaceText.REPLACEMENT_VALUE, "$1$2"); runner.setProperty(ReplaceText.REPLACEMENT_VALUE, "$1$2");
final Map<String, String> attributes = new HashMap<>(); final Map<String, String> attributes = new HashMap<>();
@ -193,7 +357,7 @@ public class TestReplaceText {
public void testBackReferenceWithInvalidReferenceIsEscaped() throws IOException { public void testBackReferenceWithInvalidReferenceIsEscaped() throws IOException {
final TestRunner runner = TestRunners.newTestRunner(new ReplaceText()); final TestRunner runner = TestRunners.newTestRunner(new ReplaceText());
runner.setValidateExpressionUsage(false); runner.setValidateExpressionUsage(false);
runner.setProperty(ReplaceText.REGEX, "(ell)"); runner.setProperty(ReplaceText.SEARCH_VALUE, "(ell)");
runner.setProperty(ReplaceText.REPLACEMENT_VALUE, "$d"); runner.setProperty(ReplaceText.REPLACEMENT_VALUE, "$d");
final Map<String, String> attributes = new HashMap<>(); final Map<String, String> attributes = new HashMap<>();
@ -212,7 +376,7 @@ public class TestReplaceText {
public void testEscapingDollarSign() throws IOException { public void testEscapingDollarSign() throws IOException {
final TestRunner runner = TestRunners.newTestRunner(new ReplaceText()); final TestRunner runner = TestRunners.newTestRunner(new ReplaceText());
runner.setValidateExpressionUsage(false); runner.setValidateExpressionUsage(false);
runner.setProperty(ReplaceText.REGEX, "(ell)"); runner.setProperty(ReplaceText.SEARCH_VALUE, "(ell)");
runner.setProperty(ReplaceText.REPLACEMENT_VALUE, "\\$1"); runner.setProperty(ReplaceText.REPLACEMENT_VALUE, "\\$1");
final Map<String, String> attributes = new HashMap<>(); final Map<String, String> attributes = new HashMap<>();
@ -231,7 +395,7 @@ public class TestReplaceText {
public void testReplaceWithEmptyString() throws IOException { public void testReplaceWithEmptyString() throws IOException {
final TestRunner runner = TestRunners.newTestRunner(new ReplaceText()); final TestRunner runner = TestRunners.newTestRunner(new ReplaceText());
runner.setValidateExpressionUsage(false); runner.setValidateExpressionUsage(false);
runner.setProperty(ReplaceText.REGEX, "(ell)"); runner.setProperty(ReplaceText.SEARCH_VALUE, "(ell)");
runner.setProperty(ReplaceText.REPLACEMENT_VALUE, ""); runner.setProperty(ReplaceText.REPLACEMENT_VALUE, "");
runner.enqueue(Paths.get("src/test/resources/hello.txt")); runner.enqueue(Paths.get("src/test/resources/hello.txt"));
@ -246,7 +410,7 @@ public class TestReplaceText {
public void testWithNoMatch() throws IOException { public void testWithNoMatch() throws IOException {
final TestRunner runner = TestRunners.newTestRunner(new ReplaceText()); final TestRunner runner = TestRunners.newTestRunner(new ReplaceText());
runner.setValidateExpressionUsage(false); runner.setValidateExpressionUsage(false);
runner.setProperty(ReplaceText.REGEX, "Z"); runner.setProperty(ReplaceText.SEARCH_VALUE, "Z");
runner.setProperty(ReplaceText.REPLACEMENT_VALUE, "Morning"); runner.setProperty(ReplaceText.REPLACEMENT_VALUE, "Morning");
runner.enqueue(Paths.get("src/test/resources/hello.txt")); runner.enqueue(Paths.get("src/test/resources/hello.txt"));
@ -261,7 +425,7 @@ public class TestReplaceText {
public void testWithMultipleMatches() throws IOException { public void testWithMultipleMatches() throws IOException {
final TestRunner runner = TestRunners.newTestRunner(new ReplaceText()); final TestRunner runner = TestRunners.newTestRunner(new ReplaceText());
runner.setValidateExpressionUsage(false); runner.setValidateExpressionUsage(false);
runner.setProperty(ReplaceText.REGEX, "l"); runner.setProperty(ReplaceText.SEARCH_VALUE, "l");
runner.setProperty(ReplaceText.REPLACEMENT_VALUE, "R"); runner.setProperty(ReplaceText.REPLACEMENT_VALUE, "R");
runner.enqueue(Paths.get("src/test/resources/hello.txt")); runner.enqueue(Paths.get("src/test/resources/hello.txt"));
@ -276,7 +440,7 @@ public class TestReplaceText {
public void testAttributeToContent() throws IOException { public void testAttributeToContent() throws IOException {
final TestRunner runner = TestRunners.newTestRunner(new ReplaceText()); final TestRunner runner = TestRunners.newTestRunner(new ReplaceText());
runner.setValidateExpressionUsage(false); runner.setValidateExpressionUsage(false);
runner.setProperty(ReplaceText.REGEX, ".*"); runner.setProperty(ReplaceText.SEARCH_VALUE, ".*");
runner.setProperty(ReplaceText.REPLACEMENT_VALUE, "${abc}"); runner.setProperty(ReplaceText.REPLACEMENT_VALUE, "${abc}");
final Map<String, String> attributes = new HashMap<>(); final Map<String, String> attributes = new HashMap<>();
@ -294,7 +458,7 @@ public class TestReplaceText {
public void testRoutesToFailureIfTooLarge() throws IOException { public void testRoutesToFailureIfTooLarge() throws IOException {
final TestRunner runner = TestRunners.newTestRunner(new ReplaceText()); final TestRunner runner = TestRunners.newTestRunner(new ReplaceText());
runner.setValidateExpressionUsage(false); runner.setValidateExpressionUsage(false);
runner.setProperty(ReplaceText.REGEX, "[123]"); runner.setProperty(ReplaceText.SEARCH_VALUE, "[123]");
runner.setProperty(ReplaceText.MAX_BUFFER_SIZE, "1 b"); runner.setProperty(ReplaceText.MAX_BUFFER_SIZE, "1 b");
runner.setProperty(ReplaceText.REPLACEMENT_VALUE, "${abc}"); runner.setProperty(ReplaceText.REPLACEMENT_VALUE, "${abc}");
@ -311,7 +475,7 @@ public class TestReplaceText {
public void testRoutesToSuccessIfTooLargeButRegexIsDotAsterisk() throws IOException { public void testRoutesToSuccessIfTooLargeButRegexIsDotAsterisk() throws IOException {
final TestRunner runner = TestRunners.newTestRunner(new ReplaceText()); final TestRunner runner = TestRunners.newTestRunner(new ReplaceText());
runner.setValidateExpressionUsage(false); runner.setValidateExpressionUsage(false);
runner.setProperty(ReplaceText.REGEX, ".*"); runner.setProperty(ReplaceText.SEARCH_VALUE, ".*");
runner.setProperty(ReplaceText.MAX_BUFFER_SIZE, "1 b"); runner.setProperty(ReplaceText.MAX_BUFFER_SIZE, "1 b");
runner.setProperty(ReplaceText.REPLACEMENT_VALUE, "${abc}"); runner.setProperty(ReplaceText.REPLACEMENT_VALUE, "${abc}");
@ -330,7 +494,7 @@ public class TestReplaceText {
public void testProblematicCase1() throws IOException { public void testProblematicCase1() throws IOException {
final TestRunner runner = TestRunners.newTestRunner(new ReplaceText()); final TestRunner runner = TestRunners.newTestRunner(new ReplaceText());
runner.setValidateExpressionUsage(false); runner.setValidateExpressionUsage(false);
runner.setProperty(ReplaceText.REGEX, ".*"); runner.setProperty(ReplaceText.SEARCH_VALUE, ".*");
runner.setProperty(ReplaceText.REPLACEMENT_VALUE, "${filename}\t${now():format(\"yyyy/MM/dd'T'HHmmss'Z'\")}\t${fileSize}\n"); runner.setProperty(ReplaceText.REPLACEMENT_VALUE, "${filename}\t${now():format(\"yyyy/MM/dd'T'HHmmss'Z'\")}\t${fileSize}\n");
final Map<String, String> attributes = new HashMap<>(); final Map<String, String> attributes = new HashMap<>();
@ -351,7 +515,7 @@ public class TestReplaceText {
public void testGetExistingContent() throws IOException { public void testGetExistingContent() throws IOException {
final TestRunner runner = TestRunners.newTestRunner(new ReplaceText()); final TestRunner runner = TestRunners.newTestRunner(new ReplaceText());
runner.setValidateExpressionUsage(false); runner.setValidateExpressionUsage(false);
runner.setProperty(ReplaceText.REGEX, "(?s)(^.*)"); runner.setProperty(ReplaceText.SEARCH_VALUE, "(?s)(^.*)");
runner.setProperty(ReplaceText.REPLACEMENT_VALUE, "attribute header\n\n${filename}\n\ndata header\n\n$1\n\nfooter"); runner.setProperty(ReplaceText.REPLACEMENT_VALUE, "attribute header\n\n${filename}\n\ndata header\n\n$1\n\nfooter");
final Map<String, String> attributes = new HashMap<>(); final Map<String, String> attributes = new HashMap<>();
@ -371,7 +535,7 @@ public class TestReplaceText {
public void testReplaceWithinCurlyBraces() throws IOException { public void testReplaceWithinCurlyBraces() throws IOException {
final TestRunner runner = TestRunners.newTestRunner(new ReplaceText()); final TestRunner runner = TestRunners.newTestRunner(new ReplaceText());
runner.setValidateExpressionUsage(false); runner.setValidateExpressionUsage(false);
runner.setProperty(ReplaceText.REGEX, ".+"); runner.setProperty(ReplaceText.SEARCH_VALUE, ".+");
runner.setProperty(ReplaceText.REPLACEMENT_VALUE, "{ ${filename} }"); runner.setProperty(ReplaceText.REPLACEMENT_VALUE, "{ ${filename} }");
final Map<String, String> attributes = new HashMap<>(); final Map<String, String> attributes = new HashMap<>();
@ -422,4 +586,421 @@ public class TestReplaceText {
final MockFlowFile out = runner.getFlowFilesForRelationship(ReplaceText.REL_SUCCESS).get(0); final MockFlowFile out = runner.getFlowFilesForRelationship(ReplaceText.REL_SUCCESS).get(0);
out.assertContentEquals(defaultValue); out.assertContentEquals(defaultValue);
} }
/* Line by Line */
@Test
public void testSimpleLineByLine() throws IOException {
final TestRunner runner = TestRunners.newTestRunner(new ReplaceText());
runner.setValidateExpressionUsage(false);
runner.setProperty(ReplaceText.EVALUATION_MODE, ReplaceText.LINE_BY_LINE);
runner.setProperty(ReplaceText.SEARCH_VALUE, "odo");
runner.setProperty(ReplaceText.REPLACEMENT_VALUE, "ood");
runner.enqueue(translateNewLines(Paths.get("src/test/resources/TestReplaceTextLineByLine/testFile.txt")));
runner.run();
runner.assertAllFlowFilesTransferred(ReplaceText.REL_SUCCESS, 1);
final MockFlowFile out = runner.getFlowFilesForRelationship(ReplaceText.REL_SUCCESS).get(0);
out.assertContentEquals(translateNewLines(new File("src/test/resources/TestReplaceTextLineByLine/food.txt")));
}
@Test
public void testPrependSimpleLineByLine() throws IOException {
final TestRunner runner = TestRunners.newTestRunner(new ReplaceText());
runner.setValidateExpressionUsage(false);
runner.setProperty(ReplaceText.EVALUATION_MODE, ReplaceText.LINE_BY_LINE);
runner.setProperty(ReplaceText.REPLACEMENT_STRATEGY, ReplaceText.PREPEND);
runner.setProperty(ReplaceText.REPLACEMENT_VALUE, "TEST ");
runner.enqueue(translateNewLines(Paths.get("src/test/resources/TestReplaceTextLineByLine/testFile.txt")));
runner.run();
runner.assertAllFlowFilesTransferred(ReplaceText.REL_SUCCESS, 1);
final MockFlowFile out = runner.getFlowFilesForRelationship(ReplaceText.REL_SUCCESS).get(0);
out.assertContentEquals(translateNewLines(new File("src/test/resources/TestReplaceTextLineByLine/PrependLineByLineTest.txt")));
}
@Test
public void testAppendSimpleLineByLine() throws IOException {
final TestRunner runner = TestRunners.newTestRunner(new ReplaceText());
runner.setValidateExpressionUsage(false);
runner.setProperty(ReplaceText.EVALUATION_MODE, ReplaceText.LINE_BY_LINE);
runner.setProperty(ReplaceText.REPLACEMENT_STRATEGY, ReplaceText.APPEND);
runner.setProperty(ReplaceText.REPLACEMENT_VALUE, " TEST");
runner.enqueue(translateNewLines(Paths.get("src/test/resources/TestReplaceTextLineByLine/testFile.txt")));
runner.run();
runner.assertAllFlowFilesTransferred(ReplaceText.REL_SUCCESS, 1);
final MockFlowFile out = runner.getFlowFilesForRelationship(ReplaceText.REL_SUCCESS).get(0);
out.assertContentEquals(translateNewLines(new File("src/test/resources/TestReplaceTextLineByLine/AppendLineByLineTest.txt")));
}
@Test
public void testAppendEndlineCR() throws IOException {
final TestRunner runner = TestRunners.newTestRunner(new ReplaceText());
runner.setValidateExpressionUsage(false);
runner.setProperty(ReplaceText.EVALUATION_MODE, ReplaceText.LINE_BY_LINE);
runner.setProperty(ReplaceText.REPLACEMENT_VALUE, "TEST");
runner.setProperty(ReplaceText.REPLACEMENT_STRATEGY, ReplaceText.APPEND);
runner.enqueue("Hello \rWorld \r".getBytes());
runner.run();
runner.assertAllFlowFilesTransferred(ReplaceText.REL_SUCCESS, 1);
final MockFlowFile out = runner.getFlowFilesForRelationship(ReplaceText.REL_SUCCESS).get(0);
out.assertContentEquals("Hello TEST\rWorld TEST\r".getBytes("UTF-8"));
}
@Test
public void testAppendEndlineCRLF() throws IOException {
final TestRunner runner = TestRunners.newTestRunner(new ReplaceText());
runner.setValidateExpressionUsage(false);
runner.setProperty(ReplaceText.EVALUATION_MODE, ReplaceText.LINE_BY_LINE);
runner.setProperty(ReplaceText.REPLACEMENT_VALUE, "TEST");
runner.setProperty(ReplaceText.REPLACEMENT_STRATEGY, ReplaceText.APPEND);
runner.enqueue("Hello \r\nWorld \r\n".getBytes());
runner.run();
runner.assertAllFlowFilesTransferred(ReplaceText.REL_SUCCESS, 1);
final MockFlowFile out = runner.getFlowFilesForRelationship(ReplaceText.REL_SUCCESS).get(0);
out.assertContentEquals("Hello TEST\r\nWorld TEST\r\n".getBytes("UTF-8"));
}
@Test
public void testSimpleLiteral() throws IOException {
final TestRunner runner = TestRunners.newTestRunner(new ReplaceText());
runner.setValidateExpressionUsage(false);
runner.setProperty(ReplaceText.EVALUATION_MODE, ReplaceText.LINE_BY_LINE);
runner.setProperty(ReplaceText.SEARCH_VALUE, "odo");
runner.setProperty(ReplaceText.REPLACEMENT_VALUE, "ood");
runner.setProperty(ReplaceText.REPLACEMENT_STRATEGY, ReplaceText.LITERAL_REPLACE);
runner.enqueue(translateNewLines(Paths.get("src/test/resources/TestReplaceTextLineByLine/testFile.txt")));
runner.run();
runner.assertAllFlowFilesTransferred(ReplaceText.REL_SUCCESS, 1);
final MockFlowFile out = runner.getFlowFilesForRelationship(ReplaceText.REL_SUCCESS).get(0);
out.assertContentEquals(translateNewLines(new File("src/test/resources/TestReplaceTextLineByLine/food.txt")));
}
@Test
public void testLiteralBackReferenceLineByLine() throws IOException {
final TestRunner runner = TestRunners.newTestRunner(new ReplaceText());
runner.setValidateExpressionUsage(false);
runner.setProperty(ReplaceText.EVALUATION_MODE, ReplaceText.LINE_BY_LINE);
runner.setProperty(ReplaceText.SEARCH_VALUE, "jo");
runner.setProperty(ReplaceText.REPLACEMENT_VALUE, "[$1]");
runner.setProperty(ReplaceText.REPLACEMENT_STRATEGY, ReplaceText.LITERAL_REPLACE);
runner.enqueue(translateNewLines(Paths.get("src/test/resources/TestReplaceTextLineByLine/testFile.txt")));
runner.run();
runner.assertAllFlowFilesTransferred(ReplaceText.REL_SUCCESS, 1);
final MockFlowFile out = runner.getFlowFilesForRelationship(ReplaceText.REL_SUCCESS).get(0);
out.assertContentEquals(translateNewLines(new File("src/test/resources/TestReplaceTextLineByLine/cu[$1]_Po[$1].txt")));
}
@Test
public void testLiteralLineByLine() throws IOException {
final TestRunner runner = TestRunners.newTestRunner(new ReplaceText());
runner.setValidateExpressionUsage(false);
runner.setProperty(ReplaceText.EVALUATION_MODE, ReplaceText.LINE_BY_LINE);
runner.setProperty(ReplaceText.SEARCH_VALUE, ".ell.");
runner.setProperty(ReplaceText.REPLACEMENT_VALUE, "test");
runner.setProperty(ReplaceText.REPLACEMENT_STRATEGY, ReplaceText.LITERAL_REPLACE);
runner.enqueue("H.ell.o, World! .ell. \n .ell. .ell.".getBytes());
runner.run();
runner.assertAllFlowFilesTransferred(ReplaceText.REL_SUCCESS, 1);
final MockFlowFile out = runner.getFlowFilesForRelationship(ReplaceText.REL_SUCCESS).get(0);
out.assertContentEquals("Htesto, World! test \n test test");
}
@Test
public void testBackReferenceLineByLine() throws IOException {
final TestRunner runner = TestRunners.newTestRunner(new ReplaceText());
runner.setValidateExpressionUsage(false);
runner.setProperty(ReplaceText.EVALUATION_MODE, ReplaceText.LINE_BY_LINE);
runner.setProperty(ReplaceText.SEARCH_VALUE, "(DODO)");
runner.setProperty(ReplaceText.REPLACEMENT_VALUE, "[$1]");
runner.enqueue(translateNewLines(Paths.get("src/test/resources/TestReplaceTextLineByLine/testFile.txt")));
runner.run();
runner.assertAllFlowFilesTransferred(ReplaceText.REL_SUCCESS, 1);
final MockFlowFile out = runner.getFlowFilesForRelationship(ReplaceText.REL_SUCCESS).get(0);
out.assertContentEquals(translateNewLines(new File("src/test/resources/TestReplaceTextLineByLine/[DODO].txt")));
}
@Test
public void testReplacementWithExpressionLanguageIsEscapedLineByLine() throws IOException {
final TestRunner runner = TestRunners.newTestRunner(new ReplaceText());
runner.setValidateExpressionUsage(false);
runner.setProperty(ReplaceText.EVALUATION_MODE, ReplaceText.LINE_BY_LINE);
runner.setProperty(ReplaceText.SEARCH_VALUE, "(jo)");
runner.setProperty(ReplaceText.REPLACEMENT_VALUE, "[${abc}]");
final Map<String, String> attributes = new HashMap<>();
attributes.put("abc", "$1");
runner.enqueue(translateNewLines(Paths.get("src/test/resources/TestReplaceTextLineByLine/testFile.txt")), attributes);
runner.run();
runner.assertAllFlowFilesTransferred(ReplaceText.REL_SUCCESS, 1);
final MockFlowFile out = runner.getFlowFilesForRelationship(ReplaceText.REL_SUCCESS).get(0);
out.assertContentEquals(translateNewLines(new File("src/test/resources/TestReplaceTextLineByLine/cu[$1]_Po[$1].txt")));
}
@Test
public void testRegexWithExpressionLanguageLineByLine() throws IOException {
final TestRunner runner = TestRunners.newTestRunner(new ReplaceText());
runner.setValidateExpressionUsage(false);
runner.setProperty(ReplaceText.EVALUATION_MODE, ReplaceText.LINE_BY_LINE);
runner.setProperty(ReplaceText.SEARCH_VALUE, "${replaceKey}");
runner.setProperty(ReplaceText.REPLACEMENT_VALUE, "${replaceValue}");
final Map<String, String> attributes = new HashMap<>();
attributes.put("replaceKey", "Riley");
attributes.put("replaceValue", "Spider");
runner.enqueue(translateNewLines(Paths.get("src/test/resources/TestReplaceTextLineByLine/testFile.txt")), attributes);
runner.run();
runner.assertAllFlowFilesTransferred(ReplaceText.REL_SUCCESS, 1);
final MockFlowFile out = runner.getFlowFilesForRelationship(ReplaceText.REL_SUCCESS).get(0);
out.assertContentEquals(translateNewLines(new File("src/test/resources/TestReplaceTextLineByLine/Spider.txt")));
}
@Test
public void testRegexWithExpressionLanguageIsEscapedLineByLine() throws IOException {
final TestRunner runner = TestRunners.newTestRunner(new ReplaceText());
runner.setValidateExpressionUsage(false);
runner.setProperty(ReplaceText.EVALUATION_MODE, ReplaceText.LINE_BY_LINE);
runner.setProperty(ReplaceText.SEARCH_VALUE, "${replaceKey}");
runner.setProperty(ReplaceText.REPLACEMENT_VALUE, "${replaceValue}");
final Map<String, String> attributes = new HashMap<>();
attributes.put("replaceKey", "R.*y");
attributes.put("replaceValue", "Spider");
runner.enqueue(translateNewLines(Paths.get("src/test/resources/TestReplaceTextLineByLine/testFile.txt")), attributes);
runner.run();
runner.assertAllFlowFilesTransferred(ReplaceText.REL_SUCCESS, 1);
final MockFlowFile out = runner.getFlowFilesForRelationship(ReplaceText.REL_SUCCESS).get(0);
out.assertContentEquals(translateNewLines(new File("src/test/resources/TestReplaceTextLineByLine/testFile.txt")));
}
@Test
public void testBackReferenceWithTooLargeOfIndexIsEscapedLineByLine() throws IOException {
final TestRunner runner = TestRunners.newTestRunner(new ReplaceText());
runner.setValidateExpressionUsage(false);
runner.setProperty(ReplaceText.EVALUATION_MODE, ReplaceText.LINE_BY_LINE);
runner.setProperty(ReplaceText.SEARCH_VALUE, "(lu)");
runner.setProperty(ReplaceText.REPLACEMENT_VALUE, "$1$2");
final Map<String, String> attributes = new HashMap<>();
attributes.put("replaceKey", "R.*y");
attributes.put("replaceValue", "Spiderman");
runner.enqueue(translateNewLines(Paths.get("src/test/resources/TestReplaceTextLineByLine/testFile.txt")), attributes);
runner.run();
runner.assertAllFlowFilesTransferred(ReplaceText.REL_SUCCESS, 1);
final MockFlowFile out = runner.getFlowFilesForRelationship(ReplaceText.REL_SUCCESS).get(0);
out.assertContentEquals(translateNewLines(new File("src/test/resources/TestReplaceTextLineByLine/Blu$2e_clu$2e.txt")));
}
@Test
public void testBackReferenceWithInvalidReferenceIsEscapedLineByLine() throws IOException {
final TestRunner runner = TestRunners.newTestRunner(new ReplaceText());
runner.setValidateExpressionUsage(false);
runner.setProperty(ReplaceText.EVALUATION_MODE, ReplaceText.LINE_BY_LINE);
runner.setProperty(ReplaceText.SEARCH_VALUE, "(ew)");
runner.setProperty(ReplaceText.REPLACEMENT_VALUE, "$d");
final Map<String, String> attributes = new HashMap<>();
attributes.put("replaceKey", "H.*o");
attributes.put("replaceValue", "Good-bye");
runner.enqueue(translateNewLines(Paths.get("src/test/resources/TestReplaceTextLineByLine/testFile.txt")), attributes);
runner.run();
runner.assertAllFlowFilesTransferred(ReplaceText.REL_SUCCESS, 1);
final MockFlowFile out = runner.getFlowFilesForRelationship(ReplaceText.REL_SUCCESS).get(0);
out.assertContentEquals(translateNewLines(new File("src/test/resources/TestReplaceTextLineByLine/D$d_h$d.txt")));
}
@Test
public void testEscapingDollarSignLineByLine() throws IOException {
final TestRunner runner = TestRunners.newTestRunner(new ReplaceText());
runner.setValidateExpressionUsage(false);
runner.setProperty(ReplaceText.EVALUATION_MODE, ReplaceText.LINE_BY_LINE);
runner.setProperty(ReplaceText.SEARCH_VALUE, "(DO)");
runner.setProperty(ReplaceText.REPLACEMENT_VALUE, "\\$1");
final Map<String, String> attributes = new HashMap<>();
attributes.put("replaceKey", "H.*o");
attributes.put("replaceValue", "Good-bye");
runner.enqueue(translateNewLines(Paths.get("src/test/resources/TestReplaceTextLineByLine/testFile.txt")), attributes);
runner.run();
runner.assertAllFlowFilesTransferred(ReplaceText.REL_SUCCESS, 1);
final MockFlowFile out = runner.getFlowFilesForRelationship(ReplaceText.REL_SUCCESS).get(0);
out.assertContentEquals(translateNewLines(new File("src/test/resources/TestReplaceTextLineByLine/$1$1.txt")));
}
@Test
public void testReplaceWithEmptyStringLineByLine() throws IOException {
final TestRunner runner = TestRunners.newTestRunner(new ReplaceText());
runner.setValidateExpressionUsage(false);
runner.setProperty(ReplaceText.EVALUATION_MODE, ReplaceText.LINE_BY_LINE);
runner.setProperty(ReplaceText.SEARCH_VALUE, "(jo)");
runner.setProperty(ReplaceText.REPLACEMENT_VALUE, "");
runner.enqueue(translateNewLines(Paths.get("src/test/resources/TestReplaceTextLineByLine/testFile.txt")));
runner.run();
runner.assertAllFlowFilesTransferred(ReplaceText.REL_SUCCESS, 1);
final MockFlowFile out = runner.getFlowFilesForRelationship(ReplaceText.REL_SUCCESS).get(0);
out.assertContentEquals(translateNewLines(new File("src/test/resources/TestReplaceTextLineByLine/cu_Po.txt")));
}
@Test
public void testWithNoMatchLineByLine() throws IOException {
final TestRunner runner = TestRunners.newTestRunner(new ReplaceText());
runner.setValidateExpressionUsage(false);
runner.setProperty(ReplaceText.EVALUATION_MODE, ReplaceText.LINE_BY_LINE);
runner.setProperty(ReplaceText.SEARCH_VALUE, "Z");
runner.setProperty(ReplaceText.REPLACEMENT_VALUE, "Morning");
runner.enqueue(translateNewLines(Paths.get("src/test/resources/TestReplaceTextLineByLine/testFile.txt")));
runner.run();
runner.assertAllFlowFilesTransferred(ReplaceText.REL_SUCCESS, 1);
final MockFlowFile out = runner.getFlowFilesForRelationship(ReplaceText.REL_SUCCESS).get(0);
out.assertContentEquals(translateNewLines(new File("src/test/resources/TestReplaceTextLineByLine/testFile.txt")));
}
@Test
public void testWithMultipleMatchesLineByLine() throws IOException {
final TestRunner runner = TestRunners.newTestRunner(new ReplaceText());
runner.setValidateExpressionUsage(false);
runner.setProperty(ReplaceText.EVALUATION_MODE, ReplaceText.LINE_BY_LINE);
runner.setProperty(ReplaceText.SEARCH_VALUE, "l");
runner.setProperty(ReplaceText.REPLACEMENT_VALUE, "R");
runner.enqueue(translateNewLines(Paths.get("src/test/resources/TestReplaceTextLineByLine/testFile.txt")));
runner.run();
runner.assertAllFlowFilesTransferred(ReplaceText.REL_SUCCESS, 1);
final MockFlowFile out = runner.getFlowFilesForRelationship(ReplaceText.REL_SUCCESS).get(0);
out.assertContentEquals(translateNewLines(new File("src/test/resources/TestReplaceTextLineByLine/BRue_cRue_RiRey.txt")));
}
@Test
public void testAttributeToContentLineByLine() throws IOException {
final TestRunner runner = TestRunners.newTestRunner(new ReplaceText());
runner.setValidateExpressionUsage(false);
runner.setProperty(ReplaceText.EVALUATION_MODE, ReplaceText.LINE_BY_LINE);
runner.setProperty(ReplaceText.SEARCH_VALUE, ".*");
runner.setProperty(ReplaceText.REPLACEMENT_VALUE, "${abc}");
final Map<String, String> attributes = new HashMap<>();
attributes.put("abc", "Good");
runner.enqueue(translateNewLines(Paths.get("src/test/resources/TestReplaceTextLineByLine/testFile.txt")), attributes);
runner.run();
runner.assertAllFlowFilesTransferred(ReplaceText.REL_SUCCESS, 1);
final MockFlowFile out = runner.getFlowFilesForRelationship(ReplaceText.REL_SUCCESS).get(0);
out.assertContentEquals(translateNewLines(new File("src/test/resources/TestReplaceTextLineByLine/Good.txt")));
}
@Test
public void testAttributeToContentWindows() throws IOException {
final TestRunner runner = TestRunners.newTestRunner(new ReplaceText());
runner.setValidateExpressionUsage(false);
runner.setProperty(ReplaceText.EVALUATION_MODE, ReplaceText.LINE_BY_LINE);
runner.setProperty(ReplaceText.SEARCH_VALUE, ".*");
runner.setProperty(ReplaceText.REPLACEMENT_VALUE, "${abc}");
final Map<String, String> attributes = new HashMap<>();
attributes.put("abc", "Good");
runner.enqueue("<<<HEADER>>>\r\n<<BODY>>\r\n<<<FOOTER>>>\r".getBytes(), attributes);
runner.run();
runner.assertAllFlowFilesTransferred(ReplaceText.REL_SUCCESS, 1);
final MockFlowFile out = runner.getFlowFilesForRelationship(ReplaceText.REL_SUCCESS).get(0);
out.assertContentEquals("GoodGoodGood");
}
@Test
public void testProblematicCase1LineByLine() throws IOException {
final TestRunner runner = TestRunners.newTestRunner(new ReplaceText());
runner.setValidateExpressionUsage(false);
runner.setProperty(ReplaceText.EVALUATION_MODE, ReplaceText.LINE_BY_LINE);
runner.setProperty(ReplaceText.SEARCH_VALUE, ".*");
runner.setProperty(ReplaceText.REPLACEMENT_VALUE, "${filename}\t${now():format(\"yyyy/MM/dd'T'HHmmss'Z'\")}\t${fileSize}\n");
final Map<String, String> attributes = new HashMap<>();
attributes.put("filename", "abc.txt");
runner.enqueue(translateNewLines(Paths.get("src/test/resources/TestReplaceTextLineByLine/testFile.txt")), attributes);
runner.run();
runner.assertAllFlowFilesTransferred(ReplaceText.REL_SUCCESS, 1);
final MockFlowFile out = runner.getFlowFilesForRelationship(ReplaceText.REL_SUCCESS).get(0);
final String outContent = translateNewLines(new String(out.toByteArray(), StandardCharsets.UTF_8));
Assert.assertTrue(outContent.startsWith("abc.txt\t"));
System.out.println(outContent);
Assert.assertTrue(outContent.endsWith("193\n") || outContent.endsWith("203\r\n"));
}
@Test
public void testGetExistingContentLineByLine() throws IOException {
final TestRunner runner = TestRunners.newTestRunner(new ReplaceText());
runner.setValidateExpressionUsage(false);
runner.setProperty(ReplaceText.EVALUATION_MODE, ReplaceText.LINE_BY_LINE);
runner.setProperty(ReplaceText.SEARCH_VALUE, "(?s)(^.*)");
runner.setProperty(ReplaceText.REPLACEMENT_VALUE, "attribute header\n\n${filename}\n\ndata header\n\n$1\n\nfooter\n");
final Map<String, String> attributes = new HashMap<>();
attributes.put("filename", "abc.txt");
runner.enqueue("Hello\nWorld!".getBytes(), attributes);
runner.run();
runner.assertAllFlowFilesTransferred(ReplaceText.REL_SUCCESS, 1);
final MockFlowFile out = runner.getFlowFilesForRelationship(ReplaceText.REL_SUCCESS).get(0);
final String outContent = new String(out.toByteArray(), StandardCharsets.UTF_8);
System.out.println(outContent);
Assert.assertTrue(outContent.equals("attribute header\n\nabc.txt\n\ndata header\n\nHello\n\n\nfooter\n"
+ "attribute header\n\nabc.txt\n\ndata header\n\nWorld!\n\nfooter\n"));
}
private byte[] translateNewLines(final File file) throws IOException {
return translateNewLines(file.toPath());
}
private byte[] translateNewLines(final Path path) throws IOException {
final byte[] data = Files.readAllBytes(path);
final String text = new String(data, StandardCharsets.UTF_8);
return translateNewLines(text).getBytes(StandardCharsets.UTF_8);
}
private String translateNewLines(final String text) {
final String lineSeparator = System.getProperty("line.separator");
final Pattern pattern = Pattern.compile("\n", Pattern.MULTILINE);
final String translated = pattern.matcher(text).replaceAll(lineSeparator);
return translated;
}
} }

View File

@ -1,336 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.nifi.processors.standard;
import java.io.File;
import java.io.IOException;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.HashMap;
import java.util.Map;
import java.util.regex.Pattern;
import org.apache.nifi.util.MockFlowFile;
import org.apache.nifi.util.TestRunner;
import org.apache.nifi.util.TestRunners;
import org.junit.Assert;
import org.junit.Test;
public class TestReplaceTextLineByLine {
@Test
public void testSimple() throws IOException {
final TestRunner runner = TestRunners.newTestRunner(new ReplaceText());
runner.setValidateExpressionUsage(false);
runner.setProperty(ReplaceText.EVALUATION_MODE, ReplaceText.LINE_BY_LINE);
runner.setProperty(ReplaceText.REGEX, "odo");
runner.setProperty(ReplaceText.REPLACEMENT_VALUE, "ood");
runner.enqueue(translateNewLines(Paths.get("src/test/resources/TestReplaceTextLineByLine/testFile.txt")));
runner.run();
runner.assertAllFlowFilesTransferred(ReplaceText.REL_SUCCESS, 1);
final MockFlowFile out = runner.getFlowFilesForRelationship(ReplaceText.REL_SUCCESS).get(0);
out.assertContentEquals(translateNewLines(new File("src/test/resources/TestReplaceTextLineByLine/food.txt")));
}
@Test
public void testBackReference() throws IOException {
final TestRunner runner = TestRunners.newTestRunner(new ReplaceText());
runner.setValidateExpressionUsage(false);
runner.setProperty(ReplaceText.EVALUATION_MODE, ReplaceText.LINE_BY_LINE);
runner.setProperty(ReplaceText.REGEX, "(DODO)");
runner.setProperty(ReplaceText.REPLACEMENT_VALUE, "[$1]");
runner.enqueue(translateNewLines(Paths.get("src/test/resources/TestReplaceTextLineByLine/testFile.txt")));
runner.run();
runner.assertAllFlowFilesTransferred(ReplaceText.REL_SUCCESS, 1);
final MockFlowFile out = runner.getFlowFilesForRelationship(ReplaceText.REL_SUCCESS).get(0);
out.assertContentEquals(translateNewLines(new File("src/test/resources/TestReplaceTextLineByLine/[DODO].txt")));
}
@Test
public void testReplacementWithExpressionLanguageIsEscaped() throws IOException {
final TestRunner runner = TestRunners.newTestRunner(new ReplaceText());
runner.setValidateExpressionUsage(false);
runner.setProperty(ReplaceText.EVALUATION_MODE, ReplaceText.LINE_BY_LINE);
runner.setProperty(ReplaceText.REGEX, "(jo)");
runner.setProperty(ReplaceText.REPLACEMENT_VALUE, "[${abc}]");
final Map<String, String> attributes = new HashMap<>();
attributes.put("abc", "$1");
runner.enqueue(translateNewLines(Paths.get("src/test/resources/TestReplaceTextLineByLine/testFile.txt")), attributes);
runner.run();
runner.assertAllFlowFilesTransferred(ReplaceText.REL_SUCCESS, 1);
final MockFlowFile out = runner.getFlowFilesForRelationship(ReplaceText.REL_SUCCESS).get(0);
out.assertContentEquals(translateNewLines(new File("src/test/resources/TestReplaceTextLineByLine/cu[$1]_Po[$1].txt")));
}
@Test
public void testRegexWithExpressionLanguage() throws IOException {
final TestRunner runner = TestRunners.newTestRunner(new ReplaceText());
runner.setValidateExpressionUsage(false);
runner.setProperty(ReplaceText.EVALUATION_MODE, ReplaceText.LINE_BY_LINE);
runner.setProperty(ReplaceText.REGEX, "${replaceKey}");
runner.setProperty(ReplaceText.REPLACEMENT_VALUE, "${replaceValue}");
final Map<String, String> attributes = new HashMap<>();
attributes.put("replaceKey", "Riley");
attributes.put("replaceValue", "Spider");
runner.enqueue(translateNewLines(Paths.get("src/test/resources/TestReplaceTextLineByLine/testFile.txt")), attributes);
runner.run();
runner.assertAllFlowFilesTransferred(ReplaceText.REL_SUCCESS, 1);
final MockFlowFile out = runner.getFlowFilesForRelationship(ReplaceText.REL_SUCCESS).get(0);
out.assertContentEquals(translateNewLines(new File("src/test/resources/TestReplaceTextLineByLine/Spider.txt")));
}
@Test
public void testRegexWithExpressionLanguageIsEscaped() throws IOException {
final TestRunner runner = TestRunners.newTestRunner(new ReplaceText());
runner.setValidateExpressionUsage(false);
runner.setProperty(ReplaceText.EVALUATION_MODE, ReplaceText.LINE_BY_LINE);
runner.setProperty(ReplaceText.REGEX, "${replaceKey}");
runner.setProperty(ReplaceText.REPLACEMENT_VALUE, "${replaceValue}");
final Map<String, String> attributes = new HashMap<>();
attributes.put("replaceKey", "R.*y");
attributes.put("replaceValue", "Spider");
runner.enqueue(translateNewLines(Paths.get("src/test/resources/TestReplaceTextLineByLine/testFile.txt")), attributes);
runner.run();
runner.assertAllFlowFilesTransferred(ReplaceText.REL_SUCCESS, 1);
final MockFlowFile out = runner.getFlowFilesForRelationship(ReplaceText.REL_SUCCESS).get(0);
out.assertContentEquals(translateNewLines(new File("src/test/resources/TestReplaceTextLineByLine/testFile.txt")));
}
@Test
public void testBackReferenceWithTooLargeOfIndexIsEscaped() throws IOException {
final TestRunner runner = TestRunners.newTestRunner(new ReplaceText());
runner.setValidateExpressionUsage(false);
runner.setProperty(ReplaceText.EVALUATION_MODE, ReplaceText.LINE_BY_LINE);
runner.setProperty(ReplaceText.REGEX, "(lu)");
runner.setProperty(ReplaceText.REPLACEMENT_VALUE, "$1$2");
final Map<String, String> attributes = new HashMap<>();
attributes.put("replaceKey", "R.*y");
attributes.put("replaceValue", "Spiderman");
runner.enqueue(translateNewLines(Paths.get("src/test/resources/TestReplaceTextLineByLine/testFile.txt")), attributes);
runner.run();
runner.assertAllFlowFilesTransferred(ReplaceText.REL_SUCCESS, 1);
final MockFlowFile out = runner.getFlowFilesForRelationship(ReplaceText.REL_SUCCESS).get(0);
out.assertContentEquals(translateNewLines(new File("src/test/resources/TestReplaceTextLineByLine/Blu$2e_clu$2e.txt")));
}
@Test
public void testBackReferenceWithInvalidReferenceIsEscaped() throws IOException {
final TestRunner runner = TestRunners.newTestRunner(new ReplaceText());
runner.setValidateExpressionUsage(false);
runner.setProperty(ReplaceText.EVALUATION_MODE, ReplaceText.LINE_BY_LINE);
runner.setProperty(ReplaceText.REGEX, "(ew)");
runner.setProperty(ReplaceText.REPLACEMENT_VALUE, "$d");
final Map<String, String> attributes = new HashMap<>();
attributes.put("replaceKey", "H.*o");
attributes.put("replaceValue", "Good-bye");
runner.enqueue(translateNewLines(Paths.get("src/test/resources/TestReplaceTextLineByLine/testFile.txt")), attributes);
runner.run();
runner.assertAllFlowFilesTransferred(ReplaceText.REL_SUCCESS, 1);
final MockFlowFile out = runner.getFlowFilesForRelationship(ReplaceText.REL_SUCCESS).get(0);
out.assertContentEquals(translateNewLines(new File("src/test/resources/TestReplaceTextLineByLine/D$d_h$d.txt")));
}
@Test
public void testEscapingDollarSign() throws IOException {
final TestRunner runner = TestRunners.newTestRunner(new ReplaceText());
runner.setValidateExpressionUsage(false);
runner.setProperty(ReplaceText.EVALUATION_MODE, ReplaceText.LINE_BY_LINE);
runner.setProperty(ReplaceText.REGEX, "(DO)");
runner.setProperty(ReplaceText.REPLACEMENT_VALUE, "\\$1");
final Map<String, String> attributes = new HashMap<>();
attributes.put("replaceKey", "H.*o");
attributes.put("replaceValue", "Good-bye");
runner.enqueue(translateNewLines(Paths.get("src/test/resources/TestReplaceTextLineByLine/testFile.txt")), attributes);
runner.run();
runner.assertAllFlowFilesTransferred(ReplaceText.REL_SUCCESS, 1);
final MockFlowFile out = runner.getFlowFilesForRelationship(ReplaceText.REL_SUCCESS).get(0);
out.assertContentEquals(translateNewLines(new File("src/test/resources/TestReplaceTextLineByLine/$1$1.txt")));
}
@Test
public void testReplaceWithEmptyString() throws IOException {
final TestRunner runner = TestRunners.newTestRunner(new ReplaceText());
runner.setValidateExpressionUsage(false);
runner.setProperty(ReplaceText.EVALUATION_MODE, ReplaceText.LINE_BY_LINE);
runner.setProperty(ReplaceText.REGEX, "(jo)");
runner.setProperty(ReplaceText.REPLACEMENT_VALUE, "");
runner.enqueue(translateNewLines(Paths.get("src/test/resources/TestReplaceTextLineByLine/testFile.txt")));
runner.run();
runner.assertAllFlowFilesTransferred(ReplaceText.REL_SUCCESS, 1);
final MockFlowFile out = runner.getFlowFilesForRelationship(ReplaceText.REL_SUCCESS).get(0);
out.assertContentEquals(translateNewLines(new File("src/test/resources/TestReplaceTextLineByLine/cu_Po.txt")));
}
@Test
public void testWithNoMatch() throws IOException {
final TestRunner runner = TestRunners.newTestRunner(new ReplaceText());
runner.setValidateExpressionUsage(false);
runner.setProperty(ReplaceText.EVALUATION_MODE, ReplaceText.LINE_BY_LINE);
runner.setProperty(ReplaceText.REGEX, "Z");
runner.setProperty(ReplaceText.REPLACEMENT_VALUE, "Morning");
runner.enqueue(translateNewLines(Paths.get("src/test/resources/TestReplaceTextLineByLine/testFile.txt")));
runner.run();
runner.assertAllFlowFilesTransferred(ReplaceText.REL_SUCCESS, 1);
final MockFlowFile out = runner.getFlowFilesForRelationship(ReplaceText.REL_SUCCESS).get(0);
out.assertContentEquals(translateNewLines(new File("src/test/resources/TestReplaceTextLineByLine/testFile.txt")));
}
@Test
public void testWithMultipleMatches() throws IOException {
final TestRunner runner = TestRunners.newTestRunner(new ReplaceText());
runner.setValidateExpressionUsage(false);
runner.setProperty(ReplaceText.EVALUATION_MODE, ReplaceText.LINE_BY_LINE);
runner.setProperty(ReplaceText.REGEX, "l");
runner.setProperty(ReplaceText.REPLACEMENT_VALUE, "R");
runner.enqueue(translateNewLines(Paths.get("src/test/resources/TestReplaceTextLineByLine/testFile.txt")));
runner.run();
runner.assertAllFlowFilesTransferred(ReplaceText.REL_SUCCESS, 1);
final MockFlowFile out = runner.getFlowFilesForRelationship(ReplaceText.REL_SUCCESS).get(0);
out.assertContentEquals(translateNewLines(new File("src/test/resources/TestReplaceTextLineByLine/BRue_cRue_RiRey.txt")));
}
@Test
public void testAttributeToContent() throws IOException {
final TestRunner runner = TestRunners.newTestRunner(new ReplaceText());
runner.setValidateExpressionUsage(false);
runner.setProperty(ReplaceText.EVALUATION_MODE, ReplaceText.LINE_BY_LINE);
runner.setProperty(ReplaceText.REGEX, ".*");
runner.setProperty(ReplaceText.REPLACEMENT_VALUE, "${abc}");
final Map<String, String> attributes = new HashMap<>();
attributes.put("abc", "Good");
runner.enqueue(translateNewLines(Paths.get("src/test/resources/TestReplaceTextLineByLine/testFile.txt")), attributes);
runner.run();
runner.assertAllFlowFilesTransferred(ReplaceText.REL_SUCCESS, 1);
final MockFlowFile out = runner.getFlowFilesForRelationship(ReplaceText.REL_SUCCESS).get(0);
out.assertContentEquals(translateNewLines(new File("src/test/resources/TestReplaceTextLineByLine/Good.txt")));
}
@Test
public void testAttributeToContentWindows() throws IOException {
final TestRunner runner = TestRunners.newTestRunner(new ReplaceText());
runner.setValidateExpressionUsage(false);
runner.setProperty(ReplaceText.EVALUATION_MODE, ReplaceText.LINE_BY_LINE);
runner.setProperty(ReplaceText.REGEX, ".*");
runner.setProperty(ReplaceText.REPLACEMENT_VALUE, "${abc}");
final Map<String, String> attributes = new HashMap<>();
attributes.put("abc", "Good");
runner.enqueue("<<<HEADER>>>\r\n<<BODY>>\r\n<<<FOOTER>>>\r".getBytes(), attributes);
runner.run();
runner.assertAllFlowFilesTransferred(ReplaceText.REL_SUCCESS, 1);
final MockFlowFile out = runner.getFlowFilesForRelationship(ReplaceText.REL_SUCCESS).get(0);
out.assertContentEquals("GoodGoodGood");
}
@Test
public void testProblematicCase1() throws IOException {
final TestRunner runner = TestRunners.newTestRunner(new ReplaceText());
runner.setValidateExpressionUsage(false);
runner.setProperty(ReplaceText.EVALUATION_MODE, ReplaceText.LINE_BY_LINE);
runner.setProperty(ReplaceText.REGEX, ".*");
runner.setProperty(ReplaceText.REPLACEMENT_VALUE, "${filename}\t${now():format(\"yyyy/MM/dd'T'HHmmss'Z'\")}\t${fileSize}\n");
final Map<String, String> attributes = new HashMap<>();
attributes.put("filename", "abc.txt");
runner.enqueue(translateNewLines(Paths.get("src/test/resources/TestReplaceTextLineByLine/testFile.txt")), attributes);
runner.run();
runner.assertAllFlowFilesTransferred(ReplaceText.REL_SUCCESS, 1);
final MockFlowFile out = runner.getFlowFilesForRelationship(ReplaceText.REL_SUCCESS).get(0);
final String outContent = translateNewLines(new String(out.toByteArray(), StandardCharsets.UTF_8));
Assert.assertTrue(outContent.startsWith("abc.txt\t"));
System.out.println(outContent);
Assert.assertTrue(outContent.endsWith("193\n") || outContent.endsWith("203\r\n"));
}
@Test
public void testGetExistingContent() throws IOException {
final TestRunner runner = TestRunners.newTestRunner(new ReplaceText());
runner.setValidateExpressionUsage(false);
runner.setProperty(ReplaceText.EVALUATION_MODE, ReplaceText.LINE_BY_LINE);
runner.setProperty(ReplaceText.REGEX, "(?s)(^.*)");
runner.setProperty(ReplaceText.REPLACEMENT_VALUE, "attribute header\n\n${filename}\n\ndata header\n\n$1\n\nfooter\n");
final Map<String, String> attributes = new HashMap<>();
attributes.put("filename", "abc.txt");
runner.enqueue("Hello\nWorld!".getBytes(), attributes);
runner.run();
runner.assertAllFlowFilesTransferred(ReplaceText.REL_SUCCESS, 1);
final MockFlowFile out = runner.getFlowFilesForRelationship(ReplaceText.REL_SUCCESS).get(0);
final String outContent = new String(out.toByteArray(), StandardCharsets.UTF_8);
System.out.println(outContent);
Assert.assertTrue(outContent.equals("attribute header\n\nabc.txt\n\ndata header\n\nHello\n\n\nfooter\n"
+ "attribute header\n\nabc.txt\n\ndata header\n\nWorld!\n\nfooter\n"));
}
private byte[] translateNewLines(final File file) throws IOException {
return translateNewLines(file.toPath());
}
private byte[] translateNewLines(final Path path) throws IOException {
final byte[] data = Files.readAllBytes(path);
final String text = new String(data, StandardCharsets.UTF_8);
return translateNewLines(text).getBytes(StandardCharsets.UTF_8);
}
private String translateNewLines(final String text) {
final String lineSeparator = System.getProperty("line.separator");
final Pattern pattern = Pattern.compile("\n", Pattern.MULTILINE);
final String translated = pattern.matcher(text).replaceAll(lineSeparator);
return translated;
}
}

View File

@ -0,0 +1,11 @@
<<<HEADER>>> TEST
Fodo DODO cujo Pojo TEST
Blue Dew clue hew TEST
Grampa Riley Huey TEST
Fodo DODO cujo Pojo TEST
Blue Dew clue hew TEST
Grampa Riley Huey TEST
Fodo DODO cujo Pojo TEST
Blue Dew clue hew TEST
Grampa Riley Huey TEST
<<<FOOTER>>> TEST

View File

@ -0,0 +1,11 @@
TEST <<<HEADER>>>
TEST Fodo DODO cujo Pojo
TEST Blue Dew clue hew
TEST Grampa Riley Huey
TEST Fodo DODO cujo Pojo
TEST Blue Dew clue hew
TEST Grampa Riley Huey
TEST Fodo DODO cujo Pojo
TEST Blue Dew clue hew
TEST Grampa Riley Huey
TEST <<<FOOTER>>>