diff --git a/nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/src/main/java/org/apache/nifi/processors/standard/ExtractText.java b/nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/src/main/java/org/apache/nifi/processors/standard/ExtractText.java index c4e55e745e..692446b0bf 100644 --- a/nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/src/main/java/org/apache/nifi/processors/standard/ExtractText.java +++ b/nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/src/main/java/org/apache/nifi/processors/standard/ExtractText.java @@ -20,6 +20,7 @@ import java.io.IOException; import java.io.InputStream; import java.nio.charset.Charset; import java.util.ArrayList; +import java.util.Collection; import java.util.Collections; import java.util.HashMap; import java.util.HashSet; @@ -31,7 +32,6 @@ import java.util.concurrent.LinkedBlockingQueue; import java.util.concurrent.atomic.AtomicReference; import java.util.regex.Matcher; import java.util.regex.Pattern; - import org.apache.nifi.annotation.behavior.DynamicProperty; import org.apache.nifi.annotation.behavior.EventDriven; import org.apache.nifi.annotation.behavior.InputRequirement; @@ -43,6 +43,9 @@ import org.apache.nifi.annotation.documentation.Tags; import org.apache.nifi.annotation.lifecycle.OnScheduled; import org.apache.nifi.annotation.lifecycle.OnStopped; import org.apache.nifi.components.PropertyDescriptor; +import org.apache.nifi.components.ValidationContext; +import org.apache.nifi.components.ValidationResult; +import org.apache.nifi.components.Validator; import org.apache.nifi.flowfile.FlowFile; import org.apache.nifi.logging.ComponentLog; import org.apache.nifi.processor.AbstractProcessor; @@ -62,26 +65,26 @@ import org.apache.nifi.stream.io.StreamUtils; @Tags({"evaluate", "extract", "Text", "Regular Expression", "regex"}) @CapabilityDescription( "Evaluates one or more Regular Expressions against the content of a FlowFile. " - + "The results of those Regular Expressions are assigned to FlowFile Attributes. " - + "Regular Expressions are entered by adding user-defined properties; " - + "the name of the property maps to the Attribute Name into which the result will be placed. " - + "The first capture group, if any found, will be placed into that attribute name." - + "But all capture groups, including the matching string sequence itself will also be " - + "provided at that attribute name with an index value provided, with the exception of a capturing group " - + "that is optional and does not match - for example, given the attribute name \"regex\" and expression " - + "\"abc(def)?(g)\" we would add an attribute \"regex.1\" with a value of \"def\" if the \"def\" matched. If " - + "the \"def\" did not match, no attribute named \"regex.1\" would be added but an attribute named \"regex.2\" " - + "with a value of \"g\" will be added regardless." - + "The value of the property must be a valid Regular Expressions with one or more capturing groups. " - + "If the Regular Expression matches more than once, only the first match will be used unless the property " - + "enabling repeating capture group is set to true. " - + "If any provided Regular Expression matches, the FlowFile(s) will be routed to 'matched'. " - + "If no provided Regular Expression matches, the FlowFile will be routed to 'unmatched' " - + "and no attributes will be applied to the FlowFile.") + + "The results of those Regular Expressions are assigned to FlowFile Attributes. " + + "Regular Expressions are entered by adding user-defined properties; " + + "the name of the property maps to the Attribute Name into which the result will be placed. " + + "The first capture group, if any found, will be placed into that attribute name." + + "But all capture groups, including the matching string sequence itself will also be " + + "provided at that attribute name with an index value provided, with the exception of a capturing group " + + "that is optional and does not match - for example, given the attribute name \"regex\" and expression " + + "\"abc(def)?(g)\" we would add an attribute \"regex.1\" with a value of \"def\" if the \"def\" matched. If " + + "the \"def\" did not match, no attribute named \"regex.1\" would be added but an attribute named \"regex.2\" " + + "with a value of \"g\" will be added regardless." + + "The value of the property must be a valid Regular Expressions with one or more capturing groups. " + + "If the Regular Expression matches more than once, only the first match will be used unless the property " + + "enabling repeating capture group is set to true. " + + "If any provided Regular Expression matches, the FlowFile(s) will be routed to 'matched'. " + + "If no provided Regular Expression matches, the FlowFile will be routed to 'unmatched' " + + "and no attributes will be applied to the FlowFile.") @DynamicProperty(name = "A FlowFile attribute", value = "A Regular Expression with one or more capturing group", description = "The first capture group, if any found, will be placed into that attribute name." - + "But all capture groups, including the matching string sequence itself will also be " - + "provided at that attribute name with an index value provided.") + + "But all capture groups, including the matching string sequence itself will also be " + + "provided at that attribute name with an index value provided.") public class ExtractText extends AbstractProcessor { public static final PropertyDescriptor CHARACTER_SET = new PropertyDescriptor.Builder() @@ -259,12 +262,38 @@ public class ExtractText extends AbstractProcessor { return new PropertyDescriptor.Builder() .name(propertyDescriptorName) .expressionLanguageSupported(false) - .addValidator(StandardValidators.createRegexValidator(1, 40, true)) + .addValidator(StandardValidators.createRegexValidator(0, 40, true)) .required(false) .dynamic(true) .build(); } + @Override + protected Collection customValidate(final ValidationContext validationContext) { + final List problems = new ArrayList<>(super.customValidate(validationContext)); + + // If the capture group zero is not going to be included, each dynamic property must have at least one group + final boolean includeCaptureGroupZero = validationContext.getProperty(INCLUDE_CAPTURE_GROUP_ZERO).getValue().equalsIgnoreCase("true"); + getLogger().debug("Include capture group zero is " + includeCaptureGroupZero); + if (!includeCaptureGroupZero) { + final Validator oneGroupMinimumValidator = StandardValidators.createRegexValidator(1, 40, true); + for (Map.Entry prop : validationContext.getProperties().entrySet()) { + PropertyDescriptor pd = prop.getKey(); + if (pd.isDynamic()) { + String value = validationContext.getProperty(pd).getValue(); + getLogger().debug("Evaluating dynamic property " + pd.getDisplayName() + " (" + pd.getName() + ") with value " + value); + ValidationResult result = oneGroupMinimumValidator.validate(pd.getDisplayName(), value, validationContext); + getLogger().debug("Validation result: " + result.toString()); + if (!result.isValid()) { + problems.add(result); + } + } + } + } + + return problems; + } + @OnScheduled public final void onScheduled(final ProcessContext context) throws IOException { final Map compiledPatternsMap = new HashMap<>(); @@ -338,7 +367,7 @@ public class ExtractText extends AbstractProcessor { final String baseKey = entry.getKey(); int start = j == 0 ? startGroupIdx : 1; for (int i = start; i <= matcher.groupCount(); i++) { - final String key = new StringBuilder(baseKey).append(".").append(i+j).toString(); + final String key = new StringBuilder(baseKey).append(".").append(i + j).toString(); String value = matcher.group(i); if (value != null && !value.isEmpty()) { if (value.length() > maxCaptureGroupLength) { @@ -351,7 +380,7 @@ public class ExtractText extends AbstractProcessor { } } j += matcher.groupCount(); - if(!context.getProperty(ENABLE_REPEATING_CAPTURE_GROUP).asBoolean()) { + if (!context.getProperty(ENABLE_REPEATING_CAPTURE_GROUP).asBoolean()) { break; } } diff --git a/nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/src/test/java/org/apache/nifi/processors/standard/TestExtractText.java b/nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/src/test/java/org/apache/nifi/processors/standard/TestExtractText.java index 93d5147089..e323c2d957 100644 --- a/nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/src/test/java/org/apache/nifi/processors/standard/TestExtractText.java +++ b/nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/src/test/java/org/apache/nifi/processors/standard/TestExtractText.java @@ -22,7 +22,6 @@ import static org.junit.Assert.assertTrue; import java.io.UnsupportedEncodingException; import java.util.Set; import java.util.regex.Pattern; - import org.apache.nifi.processor.Relationship; import org.apache.nifi.util.MockFlowFile; import org.apache.nifi.util.TestRunner; @@ -216,14 +215,6 @@ public class TestExtractText { out.assertAttributeEquals("regex.result7", null); } - @Test(expected = java.lang.AssertionError.class) - public void testNoCaptureGroups() throws UnsupportedEncodingException { - final TestRunner testRunner = TestRunners.newTestRunner(new ExtractText()); - testRunner.setProperty("regex.result1", ".*"); - testRunner.enqueue(SAMPLE_STRING.getBytes("UTF-8")); - testRunner.run(); - } - @Test public void testNoFlowFile() throws UnsupportedEncodingException { final TestRunner testRunner = TestRunners.newTestRunner(new ExtractText()); @@ -422,4 +413,39 @@ public class TestExtractText { out.assertAttributeNotExists(attributeKey + ".0"); out.assertAttributeEquals(attributeKey, SAMPLE_STRING); } + + @Test + public void testShouldAllowNoCaptureGroups() throws Exception { + // Arrange + final TestRunner testRunner = TestRunners.newTestRunner(new ExtractText()); + final String attributeKey = "regex.result"; + testRunner.setProperty(attributeKey, "(?s).*"); + + // Act + testRunner.enqueue(SAMPLE_STRING.getBytes("UTF-8")); + testRunner.run(); + + // Assert + testRunner.assertAllFlowFilesTransferred(ExtractText.REL_MATCH, 1); + final MockFlowFile out = testRunner.getFlowFilesForRelationship(ExtractText.REL_MATCH).get(0); + + // There is no global capture group, so only "key.0" exists + out.assertAttributeNotExists(attributeKey); + out.assertAttributeEquals(attributeKey + ".0", SAMPLE_STRING); + } + + @Test(expected = java.lang.AssertionError.class) + public void testShouldNotAllowNoCaptureGroupsIfZeroDisabled() throws Exception { + // Arrange + final TestRunner testRunner = TestRunners.newTestRunner(new ExtractText()); + testRunner.setProperty(ExtractText.INCLUDE_CAPTURE_GROUP_ZERO, "false"); + final String attributeKey = "regex.result"; + testRunner.setProperty(attributeKey, "(?s).*"); + + // Act + testRunner.enqueue(SAMPLE_STRING.getBytes("UTF-8")); + + // Validation should fail because nothing will match + testRunner.run(); + } }