diff --git a/nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/src/main/java/org/apache/nifi/processors/standard/ExtractText.java b/nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/src/main/java/org/apache/nifi/processors/standard/ExtractText.java index 2862c342c9..29b9c2099d 100644 --- a/nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/src/main/java/org/apache/nifi/processors/standard/ExtractText.java +++ b/nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/src/main/java/org/apache/nifi/processors/standard/ExtractText.java @@ -170,13 +170,22 @@ public class ExtractText extends AbstractProcessor { public static final PropertyDescriptor UNIX_LINES = new PropertyDescriptor.Builder() .name("Enable Unix Lines Mode") - .description("Indicates that only the '\n' line terminator is recognized int the behavior of '.', '^', and '$'. Can also be specified " + .description("Indicates that only the '\n' line terminator is recognized in the behavior of '.', '^', and '$'. Can also be specified " + "via the embeded flag (?d).") .required(true) .allowableValues("true", "false") .defaultValue("false") .build(); + public static final PropertyDescriptor INCLUDE_CAPTURE_GROUP_ZERO = new PropertyDescriptor.Builder() + .name("Include Capture Group 0") + .description("Indicates that Capture Group 0 should be included as an attribute. Capture Group 0 represents the entirety of the regular expression match, is typically not used, and " + + "could have considerable length.") + .required(true) + .allowableValues("true", "false") + .defaultValue("true") + .build(); + public static final Relationship REL_MATCH = new Relationship.Builder() .name("matched") .description("FlowFiles are routed to this relationship when the Regular Expression is successfully evaluated and the FlowFile is modified as a result") @@ -212,6 +221,7 @@ public class ExtractText extends AbstractProcessor { props.add(UNICODE_CASE); props.add(UNICODE_CHARACTER_CLASS); props.add(UNIX_LINES); + props.add(INCLUDE_CAPTURE_GROUP_ZERO); this.properties = Collections.unmodifiableList(props); } @@ -297,13 +307,16 @@ public class ExtractText extends AbstractProcessor { final Map regexResults = new HashMap<>(); final Map patternMap = compiledPattersMapRef.get(); + + final int startGroupIdx = context.getProperty(INCLUDE_CAPTURE_GROUP_ZERO).asBoolean() ? 0 : 1; + for (final Map.Entry entry : patternMap.entrySet()) { final Matcher matcher = entry.getValue().matcher(contentString); if (matcher.find()) { final String baseKey = entry.getKey(); - for (int i = 0; i <= matcher.groupCount(); i++) { + for (int i = startGroupIdx; i <= matcher.groupCount(); i++) { final String key = new StringBuilder(baseKey).append(".").append(i).toString(); String value = matcher.group(i); if (value.length() > maxCaptureGroupLength) { diff --git a/nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/src/test/java/org/apache/nifi/processors/standard/TestExtractText.java b/nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/src/test/java/org/apache/nifi/processors/standard/TestExtractText.java index fd47cf7ad2..4b7c53cd39 100644 --- a/nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/src/test/java/org/apache/nifi/processors/standard/TestExtractText.java +++ b/nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/src/test/java/org/apache/nifi/processors/standard/TestExtractText.java @@ -310,4 +310,43 @@ public class TestExtractText { assertEquals(2, relationships.size()); } + @Test + public void testIncludeZeroCaptureGroupProperty() throws Exception { + final TestRunner testRunner = TestRunners.newTestRunner(new ExtractText()); + + final String attributeKey = "regex.result"; + + testRunner.setProperty(attributeKey, "(?s)(.*)"); + + testRunner.enqueue(SAMPLE_STRING.getBytes("UTF-8")); + testRunner.run(); + + testRunner.assertAllFlowFilesTransferred(ExtractText.REL_MATCH, 1); + final MockFlowFile out = testRunner.getFlowFilesForRelationship(ExtractText.REL_MATCH).get(0); + + // Ensure the zero capture group is in the resultant attributes + out.assertAttributeExists(attributeKey + ".0"); + out.assertAttributeEquals(attributeKey, SAMPLE_STRING); + } + + @Test + public void testIgnoreZeroCaptureGroupProperty() throws Exception { + final TestRunner testRunner = TestRunners.newTestRunner(new ExtractText()); + + testRunner.setProperty(ExtractText.INCLUDE_CAPTURE_GROUP_ZERO, "false"); + + final String attributeKey = "regex.result"; + + testRunner.setProperty(attributeKey, "(?s)(.*)"); + + testRunner.enqueue(SAMPLE_STRING.getBytes("UTF-8")); + testRunner.run(); + + testRunner.assertAllFlowFilesTransferred(ExtractText.REL_MATCH, 1); + final MockFlowFile out = testRunner.getFlowFilesForRelationship(ExtractText.REL_MATCH).get(0); + + // Ensure the zero capture group is not in the resultant attributes + out.assertAttributeNotExists(attributeKey + ".0"); + out.assertAttributeEquals(attributeKey, SAMPLE_STRING); + } }