NIFI-1632: Fixed NPE that occurs if a capturing group is optional and not matched; updated docs to illustrate how this is handled

Signed-off-by: joewitt <joewitt@apache.org>
This commit is contained in:
Mark Payne 2016-03-16 11:33:13 -04:00 committed by joewitt
parent 0f61079300
commit 68cfc8c612
2 changed files with 34 additions and 7 deletions

View File

@ -67,7 +67,11 @@ import org.apache.nifi.stream.io.StreamUtils;
+ "the name of the property maps to the Attribute Name into which the result will be placed. "
+ "The first capture group, if any found, will be placed into that attribute name."
+ "But all capture groups, including the matching string sequence itself will also be "
+ "provided at that attribute name with an index value provided."
+ "provided at that attribute name with an index value provided, with the exception of a capturing group "
+ "that is optional and does not match - for example, given the attribute name \"regex\" and expression "
+ "\"abc(def)?(g)\" we would add an attribute \"regex.1\" with a value of \"def\" if the \"def\" matched. If "
+ "the \"def\" did not match, no attribute named \"regex.1\" would be added but an attribute named \"regex.2\" "
+ "with a value of \"g\" will be added regardless."
+ "The value of the property must be a valid Regular Expressions with one or more capturing groups. "
+ "If the Regular Expression matches more than once, only the first match will be used. "
+ "If any provided Regular Expression matches, the FlowFile(s) will be routed to 'matched'. "
@ -322,12 +326,14 @@ public class ExtractText extends AbstractProcessor {
for (int i = startGroupIdx; i <= matcher.groupCount(); i++) {
final String key = new StringBuilder(baseKey).append(".").append(i).toString();
String value = matcher.group(i);
if (value.length() > maxCaptureGroupLength) {
value = value.substring(0, maxCaptureGroupLength);
}
regexResults.put(key, value);
if (i == 1) {
regexResults.put(baseKey, value);
if (value != null) {
if (value.length() > maxCaptureGroupLength) {
value = value.substring(0, maxCaptureGroupLength);
}
regexResults.put(key, value);
if (i == 1) {
regexResults.put(baseKey, value);
}
}
}
}

View File

@ -65,6 +65,27 @@ public class TestExtractText {
out.assertAttributeEquals("regex.result7", null);
}
@Test
public void testWithUnmatchedOptionalCapturingGroup() {
final TestRunner testRunner = TestRunners.newTestRunner(new ExtractText());
testRunner.setProperty("regex", "abc(def)?(g)");
testRunner.enqueue("abcg");
testRunner.run();
testRunner.assertAllFlowFilesTransferred(ExtractText.REL_MATCH, 1);
final MockFlowFile out = testRunner.getFlowFilesForRelationship(ExtractText.REL_MATCH).get(0);
out.assertAttributeNotExists("regex.1");
out.assertAttributeEquals("regex.2", "g");
testRunner.clearTransferState();
testRunner.enqueue("abcdefg");
testRunner.run();
final MockFlowFile out2 = testRunner.getFlowFilesForRelationship(ExtractText.REL_MATCH).get(0);
out2.assertAttributeEquals("regex.1", "def");
out2.assertAttributeEquals("regex.2", "g");
}
@Test
public void testProcessorWithDotall() throws Exception {