From ad18853b589d80331e2f4574bce35d79bce09c28 Mon Sep 17 00:00:00 2001 From: joewitt Date: Wed, 18 Mar 2015 10:59:13 -0400 Subject: [PATCH] NIFI-399 initial port --- .../standard/EvaluateRegularExpression.java | 7 +- .../nifi/processors/standard/ExtractText.java | 294 ++++++++++++++++++ .../org.apache.nifi.processor.Processor | 1 + ...arExpression.java => TestExtractText.java} | 92 +++--- 4 files changed, 345 insertions(+), 49 deletions(-) create mode 100644 nifi/nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/src/main/java/org/apache/nifi/processors/standard/ExtractText.java rename nifi/nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/src/test/java/org/apache/nifi/processors/standard/{TestEvaluateRegularExpression.java => TestExtractText.java} (80%) diff --git a/nifi/nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/src/main/java/org/apache/nifi/processors/standard/EvaluateRegularExpression.java b/nifi/nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/src/main/java/org/apache/nifi/processors/standard/EvaluateRegularExpression.java index 4140943547..bb2e31a84c 100644 --- a/nifi/nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/src/main/java/org/apache/nifi/processors/standard/EvaluateRegularExpression.java +++ b/nifi/nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/src/main/java/org/apache/nifi/processors/standard/EvaluateRegularExpression.java @@ -52,9 +52,10 @@ import org.apache.commons.lang3.StringUtils; @EventDriven @SideEffectFree @SupportsBatching -@Tags({"evaluate", "Text", "Regular Expression", "regex", "experimental"}) +@Tags({"deprecated"}) @CapabilityDescription( - "Evaluates one or more Regular Expressions against the content of a FlowFile. " + "WARNING: This has been deprecated and will be removed in 0.2.0. Use ExtractText instead.\n" + + "Evaluates one or more Regular Expressions against the content of a FlowFile. " + "The results of those Regular Expressions are assigned to FlowFile Attributes. " + "Regular Expressions are entered by adding user-defined properties; " + "the name of the property maps to the Attribute Name into which the result will be placed. " @@ -62,7 +63,7 @@ import org.apache.commons.lang3.StringUtils; + "If the Regular Expression matches more than once, only the first match will be used. " + "If any provided Regular Expression matches, the FlowFile(s) will be routed to 'matched'. " + "If no provided Regular Expression matches, the FlowFile will be routed to 'unmatched' and no attributes will be applied to the FlowFile.") - +@Deprecated public class EvaluateRegularExpression extends AbstractProcessor { public static final PropertyDescriptor CHARACTER_SET = new PropertyDescriptor.Builder() diff --git a/nifi/nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/src/main/java/org/apache/nifi/processors/standard/ExtractText.java b/nifi/nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/src/main/java/org/apache/nifi/processors/standard/ExtractText.java new file mode 100644 index 0000000000..6c914d899d --- /dev/null +++ b/nifi/nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/src/main/java/org/apache/nifi/processors/standard/ExtractText.java @@ -0,0 +1,294 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.nifi.processors.standard; + +import java.io.IOException; +import java.io.InputStream; +import java.nio.charset.Charset; +import java.util.ArrayList; +import java.util.Collections; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +import org.apache.nifi.components.PropertyDescriptor; +import org.apache.nifi.flowfile.FlowFile; +import org.apache.nifi.stream.io.StreamUtils; +import org.apache.nifi.logging.ProcessorLog; +import org.apache.nifi.processor.AbstractProcessor; +import org.apache.nifi.processor.DataUnit; +import org.apache.nifi.processor.ProcessContext; +import org.apache.nifi.processor.ProcessSession; +import org.apache.nifi.processor.ProcessorInitializationContext; +import org.apache.nifi.processor.Relationship; +import org.apache.nifi.annotation.documentation.CapabilityDescription; +import org.apache.nifi.annotation.behavior.EventDriven; +import org.apache.nifi.annotation.behavior.SideEffectFree; +import org.apache.nifi.annotation.behavior.SupportsBatching; +import org.apache.nifi.annotation.documentation.Tags; +import org.apache.nifi.processor.io.InputStreamCallback; +import org.apache.nifi.processor.util.StandardValidators; + +import org.apache.commons.lang3.StringUtils; + +@EventDriven +@SideEffectFree +@SupportsBatching +@Tags({"evaluate", "extract", "Text", "Regular Expression", "regex"}) +@CapabilityDescription( + "Evaluates one or more Regular Expressions against the content of a FlowFile. " + + "The results of those Regular Expressions are assigned to FlowFile Attributes. " + + "Regular Expressions are entered by adding user-defined properties; " + + "the name of the property maps to the Attribute Name into which the result will be placed. " + + "The value of the property must be a valid Regular Expressions with exactly one capturing group. " + + "If the Regular Expression matches more than once, only the first match will be used. " + + "If any provided Regular Expression matches, the FlowFile(s) will be routed to 'matched'. " + + "If no provided Regular Expression matches, the FlowFile will be routed to 'unmatched' and no attributes will be applied to the FlowFile.") + +public class ExtractText extends AbstractProcessor { + + public static final PropertyDescriptor CHARACTER_SET = new PropertyDescriptor.Builder() + .name("Character Set") + .description("The Character Set in which the file is encoded") + .required(true) + .addValidator(StandardValidators.CHARACTER_SET_VALIDATOR) + .defaultValue("UTF-8") + .build(); + + public static final PropertyDescriptor MAX_BUFFER_SIZE = new PropertyDescriptor.Builder() + .name("Maximum Buffer Size") + .description("Specifies the maximum amount of data to buffer (per file) in order to apply the regular expressions. Files larger than the specified maximum will not be fully evaluated.") + .required(true) + .addValidator(StandardValidators.DATA_SIZE_VALIDATOR) + .defaultValue("1 MB") + .build(); + + public static final PropertyDescriptor CANON_EQ = new PropertyDescriptor.Builder() + .name("Enable Canonical Equivalence") + .description("Indicates that two characters match only when their full canonical decompositions match.") + .required(true) + .allowableValues("true", "false") + .defaultValue("false") + .build(); + + public static final PropertyDescriptor CASE_INSENSITIVE = new PropertyDescriptor.Builder() + .name("Enable Case-insensitive Matching") + .description("Indicates that two characters match even if they are in a different case. Can also be specified via the embeded flag (?i).") + .required(true) + .allowableValues("true", "false") + .defaultValue("false") + .build(); + + public static final PropertyDescriptor COMMENTS = new PropertyDescriptor.Builder() + .name("Permit Whitespace and Comments in Pattern") + .description("In this mode, whitespace is ignored, and embedded comments starting with # are ignored until the end of a line. Can also be specified via the embeded flag (?x).") + .required(true) + .allowableValues("true", "false") + .defaultValue("false") + .build(); + + public static final PropertyDescriptor DOTALL = new PropertyDescriptor.Builder() + .name("Enable DOTALL Mode") + .description("Indicates that the expression '.' should match any character, including a line terminator. Can also be specified via the embeded flag (?s).") + .required(true) + .allowableValues("true", "false") + .defaultValue("false") + .build(); + + public static final PropertyDescriptor LITERAL = new PropertyDescriptor.Builder() + .name("Enable Literal Parsing of the Pattern") + .description("Indicates that Metacharacters and escape characters should be given no special meaning.") + .required(true) + .allowableValues("true", "false") + .defaultValue("false") + .build(); + + public static final PropertyDescriptor MULTILINE = new PropertyDescriptor.Builder() + .name("Enable Multiline Mode") + .description("Indicates that '^' and '$' should match just after and just before a line terminator or end of sequence, instead of only the begining or end of the entire input. Can also be specified via the embeded flag (?m).") + .required(true) + .allowableValues("true", "false") + .defaultValue("false") + .build(); + + public static final PropertyDescriptor UNICODE_CASE = new PropertyDescriptor.Builder() + .name("Enable Unicode-aware Case Folding") + .description("When used with 'Enable Case-insensitive Matching', matches in a manner consistent with the Unicode Standard. Can also be specified via the embeded flag (?u).") + .required(true) + .allowableValues("true", "false") + .defaultValue("false") + .build(); + + public static final PropertyDescriptor UNICODE_CHARACTER_CLASS = new PropertyDescriptor.Builder() + .name("Enable Unicode Predefined Character Classes") + .description("Specifies conformance with the Unicode Technical Standard #18: Unicode Regular Expression Annex C: Compatibility Properties. Can also be specified via the embeded flag (?U).") + .required(true) + .allowableValues("true", "false") + .defaultValue("false") + .build(); + + public static final PropertyDescriptor UNIX_LINES = new PropertyDescriptor.Builder() + .name("Enable Unix Lines Mode") + .description("Indicates that only the '\n' line terminator is recognized int the behavior of '.', '^', and '$'. Can also be specified via the embeded flag (?d).") + .required(true) + .allowableValues("true", "false") + .defaultValue("false") + .build(); + + public static final Relationship REL_MATCH = new Relationship.Builder() + .name("matched") + .description( + "FlowFiles are routed to this relationship when the Regular Expression is successfully evaluated and the FlowFile " + + "is modified as a result") + .build(); + + public static final Relationship REL_NO_MATCH = new Relationship.Builder() + .name("unmatched") + .description( + "FlowFiles are routed to this relationship when no provided Regular Expression matches the content of the FlowFile") + .build(); + + private Set relationships; + private List properties; + + @Override + protected void init(final ProcessorInitializationContext context) { + final Set relationships = new HashSet<>(); + relationships.add(REL_MATCH); + relationships.add(REL_NO_MATCH); + this.relationships = Collections.unmodifiableSet(relationships); + + final List properties = new ArrayList<>(); + properties.add(CHARACTER_SET); + properties.add(MAX_BUFFER_SIZE); + properties.add(CANON_EQ); + properties.add(CASE_INSENSITIVE); + properties.add(COMMENTS); + properties.add(DOTALL); + properties.add(LITERAL); + properties.add(MULTILINE); + properties.add(UNICODE_CASE); + properties.add(UNICODE_CHARACTER_CLASS); + properties.add(UNIX_LINES); + this.properties = Collections.unmodifiableList(properties); + } + + @Override + public Set getRelationships() { + return relationships; + } + + @Override + protected List getSupportedPropertyDescriptors() { + return properties; + } + + @Override + protected PropertyDescriptor getSupportedDynamicPropertyDescriptor(final String propertyDescriptorName) { + return new PropertyDescriptor.Builder() + .name(propertyDescriptorName) + .expressionLanguageSupported(false) + .addValidator(StandardValidators.createRegexValidator(1, 1, true)) + .required(false) + .dynamic(true) + .build(); + } + + @Override + public void onTrigger(final ProcessContext context, final ProcessSession session) { + final List flowFileBatch = session.get(50); + if (flowFileBatch.isEmpty()) { + return; + } + final ProcessorLog logger = getLogger(); + + // Compile the Regular Expressions + Map regexMap = new HashMap<>(); + for (final Map.Entry entry : context.getProperties().entrySet()) { + if (!entry.getKey().isDynamic()) { + continue; + } + final int flags = getCompileFlags(context); + final Matcher matcher = Pattern.compile(entry.getValue(), flags).matcher(""); + regexMap.put(entry.getKey().getName(), matcher); + } + + final Charset charset = Charset.forName(context.getProperty(CHARACTER_SET).getValue()); + + final int maxBufferSize = context.getProperty(MAX_BUFFER_SIZE).asDataSize(DataUnit.B).intValue(); + + for (FlowFile flowFile : flowFileBatch) { + + final Map regexResults = new HashMap<>(); + + final byte[] buffer = new byte[maxBufferSize]; + + session.read(flowFile, new InputStreamCallback() { + @Override + public void process(InputStream in) throws IOException { + StreamUtils.fillBuffer(in, buffer, false); + } + }); + + final int flowFileSize = Math.min((int) flowFile.getSize(), maxBufferSize); + + final String contentString = new String(buffer, 0, flowFileSize, charset); + + for (final Map.Entry entry : regexMap.entrySet()) { + + final Matcher matcher = entry.getValue(); + + matcher.reset(contentString); + + if (matcher.find()) { + final String group = matcher.group(1); + if (!StringUtils.isBlank(group)) { + regexResults.put(entry.getKey(), group); + } + } + } + + if (!regexResults.isEmpty()) { + flowFile = session.putAllAttributes(flowFile, regexResults); + session.getProvenanceReporter().modifyAttributes(flowFile); + session.transfer(flowFile, REL_MATCH); + logger.info("Matched {} Regular Expressions and added attributes to FlowFile {}", new Object[]{regexResults.size(), flowFile}); + } else { + session.transfer(flowFile, REL_NO_MATCH); + logger.info("Did not match any Regular Expressions for FlowFile {}", new Object[]{flowFile}); + } + + } // end flowFileLoop + } + + int getCompileFlags(ProcessContext context) { + int flags = (context.getProperty(UNIX_LINES).asBoolean() ? Pattern.UNIX_LINES : 0) + | (context.getProperty(CASE_INSENSITIVE).asBoolean() ? Pattern.CASE_INSENSITIVE : 0) + | (context.getProperty(COMMENTS).asBoolean() ? Pattern.COMMENTS : 0) + | (context.getProperty(MULTILINE).asBoolean() ? Pattern.MULTILINE : 0) + | (context.getProperty(LITERAL).asBoolean() ? Pattern.LITERAL : 0) + | (context.getProperty(DOTALL).asBoolean() ? Pattern.DOTALL : 0) + | (context.getProperty(UNICODE_CASE).asBoolean() ? Pattern.UNICODE_CASE : 0) + | (context.getProperty(CANON_EQ).asBoolean() ? Pattern.CANON_EQ : 0) + | (context.getProperty(UNICODE_CHARACTER_CLASS).asBoolean() ? Pattern.UNICODE_CHARACTER_CLASS : 0); + return flags; + } +} diff --git a/nifi/nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/src/main/resources/META-INF/services/org.apache.nifi.processor.Processor b/nifi/nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/src/main/resources/META-INF/services/org.apache.nifi.processor.Processor index f81ccec9c4..7fbd7817a6 100644 --- a/nifi/nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/src/main/resources/META-INF/services/org.apache.nifi.processor.Processor +++ b/nifi/nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/src/main/resources/META-INF/services/org.apache.nifi.processor.Processor @@ -25,6 +25,7 @@ org.apache.nifi.processors.standard.EvaluateXPath org.apache.nifi.processors.standard.EvaluateXQuery org.apache.nifi.processors.standard.ExecuteStreamCommand org.apache.nifi.processors.standard.ExecuteProcess +org.apache.nifi.processors.standard.ExtractText org.apache.nifi.processors.standard.GenerateFlowFile org.apache.nifi.processors.standard.GetFile org.apache.nifi.processors.standard.GetFTP diff --git a/nifi/nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/src/test/java/org/apache/nifi/processors/standard/TestEvaluateRegularExpression.java b/nifi/nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/src/test/java/org/apache/nifi/processors/standard/TestExtractText.java similarity index 80% rename from nifi/nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/src/test/java/org/apache/nifi/processors/standard/TestEvaluateRegularExpression.java rename to nifi/nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/src/test/java/org/apache/nifi/processors/standard/TestExtractText.java index c1e5b3ceb5..355d2552f7 100644 --- a/nifi/nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/src/test/java/org/apache/nifi/processors/standard/TestEvaluateRegularExpression.java +++ b/nifi/nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/src/test/java/org/apache/nifi/processors/standard/TestExtractText.java @@ -16,7 +16,7 @@ */ package org.apache.nifi.processors.standard; -import org.apache.nifi.processors.standard.EvaluateRegularExpression; +import org.apache.nifi.processors.standard.ExtractText; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertTrue; @@ -31,14 +31,14 @@ import org.apache.nifi.util.TestRunners; import org.junit.Test; -public class TestEvaluateRegularExpression { +public class TestExtractText { final String SAMPLE_STRING = "foo\r\nbar1\r\nbar2\r\nbar3\r\nhello\r\nworld\r\n"; @Test public void testProcessor() throws Exception { - final TestRunner testRunner = TestRunners.newTestRunner(new EvaluateRegularExpression()); + final TestRunner testRunner = TestRunners.newTestRunner(new ExtractText()); testRunner.setProperty("regex.result1", "(?s)(.*)"); testRunner.setProperty("regex.result2", "(?s).*(bar1).*"); @@ -51,8 +51,8 @@ public class TestEvaluateRegularExpression { testRunner.enqueue(SAMPLE_STRING.getBytes("UTF-8")); testRunner.run(); - testRunner.assertAllFlowFilesTransferred(EvaluateRegularExpression.REL_MATCH, 1); - final MockFlowFile out = testRunner.getFlowFilesForRelationship(EvaluateRegularExpression.REL_MATCH).get(0); + testRunner.assertAllFlowFilesTransferred(ExtractText.REL_MATCH, 1); + final MockFlowFile out = testRunner.getFlowFilesForRelationship(ExtractText.REL_MATCH).get(0); out.assertAttributeEquals("regex.result1", SAMPLE_STRING); out.assertAttributeEquals("regex.result2", "bar1"); out.assertAttributeEquals("regex.result3", "bar1"); @@ -65,9 +65,9 @@ public class TestEvaluateRegularExpression { @Test public void testProcessorWithDotall() throws Exception { - final TestRunner testRunner = TestRunners.newTestRunner(new EvaluateRegularExpression()); + final TestRunner testRunner = TestRunners.newTestRunner(new ExtractText()); - testRunner.setProperty(EvaluateRegularExpression.DOTALL, "true"); + testRunner.setProperty(ExtractText.DOTALL, "true"); testRunner.setProperty("regex.result1", "(.*)"); testRunner.setProperty("regex.result2", ".*(bar1).*"); @@ -80,8 +80,8 @@ public class TestEvaluateRegularExpression { testRunner.enqueue(SAMPLE_STRING.getBytes("UTF-8")); testRunner.run(); - testRunner.assertAllFlowFilesTransferred(EvaluateRegularExpression.REL_MATCH, 1); - final MockFlowFile out = testRunner.getFlowFilesForRelationship(EvaluateRegularExpression.REL_MATCH).get(0); + testRunner.assertAllFlowFilesTransferred(ExtractText.REL_MATCH, 1); + final MockFlowFile out = testRunner.getFlowFilesForRelationship(ExtractText.REL_MATCH).get(0); out.assertAttributeEquals("regex.result1", SAMPLE_STRING); out.assertAttributeEquals("regex.result2", "bar1"); out.assertAttributeEquals("regex.result3", "bar1"); @@ -95,9 +95,9 @@ public class TestEvaluateRegularExpression { @Test public void testProcessorWithMultiline() throws Exception { - final TestRunner testRunner = TestRunners.newTestRunner(new EvaluateRegularExpression()); + final TestRunner testRunner = TestRunners.newTestRunner(new ExtractText()); - testRunner.setProperty(EvaluateRegularExpression.MULTILINE, "true"); + testRunner.setProperty(ExtractText.MULTILINE, "true"); testRunner.setProperty("regex.result1", "(.*)"); testRunner.setProperty("regex.result2", "(bar1)"); @@ -112,8 +112,8 @@ public class TestEvaluateRegularExpression { testRunner.enqueue(SAMPLE_STRING.getBytes("UTF-8")); testRunner.run(); - testRunner.assertAllFlowFilesTransferred(EvaluateRegularExpression.REL_MATCH, 1); - final MockFlowFile out = testRunner.getFlowFilesForRelationship(EvaluateRegularExpression.REL_MATCH).get(0); + testRunner.assertAllFlowFilesTransferred(ExtractText.REL_MATCH, 1); + final MockFlowFile out = testRunner.getFlowFilesForRelationship(ExtractText.REL_MATCH).get(0); out.assertAttributeEquals("regex.result1", "foo"); // matches everything on the first line out.assertAttributeEquals("regex.result2", "bar1"); out.assertAttributeEquals("regex.result3", "bar1"); @@ -128,10 +128,10 @@ public class TestEvaluateRegularExpression { @Test public void testProcessorWithMultilineAndDotall() throws Exception { - final TestRunner testRunner = TestRunners.newTestRunner(new EvaluateRegularExpression()); + final TestRunner testRunner = TestRunners.newTestRunner(new ExtractText()); - testRunner.setProperty(EvaluateRegularExpression.MULTILINE, "true"); - testRunner.setProperty(EvaluateRegularExpression.DOTALL, "true"); + testRunner.setProperty(ExtractText.MULTILINE, "true"); + testRunner.setProperty(ExtractText.DOTALL, "true"); testRunner.setProperty("regex.result1", "(.*)"); testRunner.setProperty("regex.result2", "(bar1)"); @@ -146,8 +146,8 @@ public class TestEvaluateRegularExpression { testRunner.enqueue(SAMPLE_STRING.getBytes("UTF-8")); testRunner.run(); - testRunner.assertAllFlowFilesTransferred(EvaluateRegularExpression.REL_MATCH, 1); - final MockFlowFile out = testRunner.getFlowFilesForRelationship(EvaluateRegularExpression.REL_MATCH).get(0); + testRunner.assertAllFlowFilesTransferred(ExtractText.REL_MATCH, 1); + final MockFlowFile out = testRunner.getFlowFilesForRelationship(ExtractText.REL_MATCH).get(0); out.assertAttributeEquals("regex.result1", SAMPLE_STRING); out.assertAttributeEquals("regex.result2", "bar1"); @@ -163,10 +163,10 @@ public class TestEvaluateRegularExpression { @Test public void testProcessorWithNoMatches() throws Exception { - final TestRunner testRunner = TestRunners.newTestRunner(new EvaluateRegularExpression()); + final TestRunner testRunner = TestRunners.newTestRunner(new ExtractText()); - testRunner.setProperty(EvaluateRegularExpression.MULTILINE, "true"); - testRunner.setProperty(EvaluateRegularExpression.DOTALL, "true"); + testRunner.setProperty(ExtractText.MULTILINE, "true"); + testRunner.setProperty(ExtractText.DOTALL, "true"); testRunner.setProperty("regex.result2", "(bar1)"); testRunner.setProperty("regex.result3", ".*?(bar\\d).*"); @@ -179,8 +179,8 @@ public class TestEvaluateRegularExpression { testRunner.enqueue("YYY".getBytes("UTF-8")); testRunner.run(); - testRunner.assertAllFlowFilesTransferred(EvaluateRegularExpression.REL_NO_MATCH, 1); - final MockFlowFile out = testRunner.getFlowFilesForRelationship(EvaluateRegularExpression.REL_NO_MATCH).get(0); + testRunner.assertAllFlowFilesTransferred(ExtractText.REL_NO_MATCH, 1); + final MockFlowFile out = testRunner.getFlowFilesForRelationship(ExtractText.REL_NO_MATCH).get(0); out.assertAttributeEquals("regex.result1", null); out.assertAttributeEquals("regex.result2", null); @@ -195,7 +195,7 @@ public class TestEvaluateRegularExpression { @Test(expected = java.lang.AssertionError.class) public void testNoCaptureGroups() throws UnsupportedEncodingException { - final TestRunner testRunner = TestRunners.newTestRunner(new EvaluateRegularExpression()); + final TestRunner testRunner = TestRunners.newTestRunner(new ExtractText()); testRunner.setProperty("regex.result1", ".*"); testRunner.enqueue(SAMPLE_STRING.getBytes("UTF-8")); testRunner.run(); @@ -203,15 +203,15 @@ public class TestEvaluateRegularExpression { @Test public void testNoFlowFile() throws UnsupportedEncodingException { - final TestRunner testRunner = TestRunners.newTestRunner(new EvaluateRegularExpression()); + final TestRunner testRunner = TestRunners.newTestRunner(new ExtractText()); testRunner.run(); - testRunner.assertAllFlowFilesTransferred(EvaluateRegularExpression.REL_MATCH, 0); + testRunner.assertAllFlowFilesTransferred(ExtractText.REL_MATCH, 0); } @Test(expected = java.lang.AssertionError.class) public void testTooManyCaptureGroups() throws UnsupportedEncodingException { - final TestRunner testRunner = TestRunners.newTestRunner(new EvaluateRegularExpression()); + final TestRunner testRunner = TestRunners.newTestRunner(new ExtractText()); testRunner.setProperty("regex.result1", "(.)(.)"); testRunner.enqueue(SAMPLE_STRING.getBytes("UTF-8")); testRunner.run(); @@ -219,9 +219,9 @@ public class TestEvaluateRegularExpression { @Test public void testMatchOutsideBuffer() throws Exception { - final TestRunner testRunner = TestRunners.newTestRunner(new EvaluateRegularExpression()); + final TestRunner testRunner = TestRunners.newTestRunner(new ExtractText()); - testRunner.setProperty(EvaluateRegularExpression.MAX_BUFFER_SIZE, "3 B");//only read the first 3 chars ("foo") + testRunner.setProperty(ExtractText.MAX_BUFFER_SIZE, "3 B");//only read the first 3 chars ("foo") testRunner.setProperty("regex.result1", "(foo)"); testRunner.setProperty("regex.result2", "(world)"); @@ -229,8 +229,8 @@ public class TestEvaluateRegularExpression { testRunner.enqueue(SAMPLE_STRING.getBytes("UTF-8")); testRunner.run(); - testRunner.assertAllFlowFilesTransferred(EvaluateRegularExpression.REL_MATCH, 1); - final MockFlowFile out = testRunner.getFlowFilesForRelationship(EvaluateRegularExpression.REL_MATCH).get(0); + testRunner.assertAllFlowFilesTransferred(ExtractText.REL_MATCH, 1); + final MockFlowFile out = testRunner.getFlowFilesForRelationship(ExtractText.REL_MATCH).get(0); out.assertAttributeEquals("regex.result1", "foo"); out.assertAttributeEquals("regex.result2", null); // null because outsk @@ -239,7 +239,7 @@ public class TestEvaluateRegularExpression { @Test public void testGetCompileFlags() { - final EvaluateRegularExpression processor = new EvaluateRegularExpression(); + final ExtractText processor = new ExtractText(); TestRunner testRunner; int flags; @@ -250,60 +250,60 @@ public class TestEvaluateRegularExpression { // UNIX_LINES testRunner = TestRunners.newTestRunner(processor); - testRunner.setProperty(EvaluateRegularExpression.UNIX_LINES, "true"); + testRunner.setProperty(ExtractText.UNIX_LINES, "true"); assertEquals(Pattern.UNIX_LINES, processor.getCompileFlags(testRunner.getProcessContext())); // CASE_INSENSITIVE testRunner = TestRunners.newTestRunner(processor); - testRunner.setProperty(EvaluateRegularExpression.CASE_INSENSITIVE, "true"); + testRunner.setProperty(ExtractText.CASE_INSENSITIVE, "true"); assertEquals(Pattern.CASE_INSENSITIVE, processor.getCompileFlags(testRunner.getProcessContext())); // COMMENTS testRunner = TestRunners.newTestRunner(processor); - testRunner.setProperty(EvaluateRegularExpression.COMMENTS, "true"); + testRunner.setProperty(ExtractText.COMMENTS, "true"); assertEquals(Pattern.COMMENTS, processor.getCompileFlags(testRunner.getProcessContext())); // MULTILINE testRunner = TestRunners.newTestRunner(processor); - testRunner.setProperty(EvaluateRegularExpression.MULTILINE, "true"); + testRunner.setProperty(ExtractText.MULTILINE, "true"); assertEquals(Pattern.MULTILINE, processor.getCompileFlags(testRunner.getProcessContext())); // LITERAL testRunner = TestRunners.newTestRunner(processor); - testRunner.setProperty(EvaluateRegularExpression.LITERAL, "true"); + testRunner.setProperty(ExtractText.LITERAL, "true"); assertEquals(Pattern.LITERAL, processor.getCompileFlags(testRunner.getProcessContext())); // DOTALL testRunner = TestRunners.newTestRunner(processor); - testRunner.setProperty(EvaluateRegularExpression.DOTALL, "true"); + testRunner.setProperty(ExtractText.DOTALL, "true"); assertEquals(Pattern.DOTALL, processor.getCompileFlags(testRunner.getProcessContext())); // UNICODE_CASE testRunner = TestRunners.newTestRunner(processor); - testRunner.setProperty(EvaluateRegularExpression.UNICODE_CASE, "true"); + testRunner.setProperty(ExtractText.UNICODE_CASE, "true"); assertEquals(Pattern.UNICODE_CASE, processor.getCompileFlags(testRunner.getProcessContext())); // CANON_EQ testRunner = TestRunners.newTestRunner(processor); - testRunner.setProperty(EvaluateRegularExpression.CANON_EQ, "true"); + testRunner.setProperty(ExtractText.CANON_EQ, "true"); assertEquals(Pattern.CANON_EQ, processor.getCompileFlags(testRunner.getProcessContext())); // UNICODE_CHARACTER_CLASS testRunner = TestRunners.newTestRunner(processor); - testRunner.setProperty(EvaluateRegularExpression.UNICODE_CHARACTER_CLASS, "true"); + testRunner.setProperty(ExtractText.UNICODE_CHARACTER_CLASS, "true"); assertEquals(Pattern.UNICODE_CHARACTER_CLASS, processor.getCompileFlags(testRunner.getProcessContext())); // DOTALL and MULTILINE testRunner = TestRunners.newTestRunner(processor); - testRunner.setProperty(EvaluateRegularExpression.DOTALL, "true"); - testRunner.setProperty(EvaluateRegularExpression.MULTILINE, "true"); + testRunner.setProperty(ExtractText.DOTALL, "true"); + testRunner.setProperty(ExtractText.MULTILINE, "true"); assertEquals(Pattern.DOTALL | Pattern.MULTILINE, processor.getCompileFlags(testRunner.getProcessContext())); } @Test public void testGetRelationShips() throws Exception { - final EvaluateRegularExpression processor = new EvaluateRegularExpression(); + final ExtractText processor = new ExtractText(); final TestRunner testRunner = TestRunners.newTestRunner(processor); // testRunner.setProperty("regex.result1", "(.*)"); @@ -311,8 +311,8 @@ public class TestEvaluateRegularExpression { testRunner.run(); Set relationships = processor.getRelationships(); - assertTrue(relationships.contains(EvaluateRegularExpression.REL_MATCH)); - assertTrue(relationships.contains(EvaluateRegularExpression.REL_NO_MATCH)); + assertTrue(relationships.contains(ExtractText.REL_MATCH)); + assertTrue(relationships.contains(ExtractText.REL_NO_MATCH)); assertEquals(2, relationships.size()); }