NIFI-4095 Changed minimum capture group count in ExtractText from 1 to 0.

Added unit test and removed obsolete test.
Added custom validation to enforce capture group if "include capture group 0" is false.
This commit is contained in:
Andy LoPresto 2017-06-20 16:23:19 -04:00 committed by Pierre Villard
parent 5c755c006b
commit 253ea2e73b
2 changed files with 86 additions and 31 deletions

View File

@ -20,6 +20,7 @@ import java.io.IOException;
import java.io.InputStream;
import java.nio.charset.Charset;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
@ -31,7 +32,6 @@ import java.util.concurrent.LinkedBlockingQueue;
import java.util.concurrent.atomic.AtomicReference;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.nifi.annotation.behavior.DynamicProperty;
import org.apache.nifi.annotation.behavior.EventDriven;
import org.apache.nifi.annotation.behavior.InputRequirement;
@ -43,6 +43,9 @@ import org.apache.nifi.annotation.documentation.Tags;
import org.apache.nifi.annotation.lifecycle.OnScheduled;
import org.apache.nifi.annotation.lifecycle.OnStopped;
import org.apache.nifi.components.PropertyDescriptor;
import org.apache.nifi.components.ValidationContext;
import org.apache.nifi.components.ValidationResult;
import org.apache.nifi.components.Validator;
import org.apache.nifi.flowfile.FlowFile;
import org.apache.nifi.logging.ComponentLog;
import org.apache.nifi.processor.AbstractProcessor;
@ -259,12 +262,38 @@ public class ExtractText extends AbstractProcessor {
return new PropertyDescriptor.Builder()
.name(propertyDescriptorName)
.expressionLanguageSupported(false)
.addValidator(StandardValidators.createRegexValidator(1, 40, true))
.addValidator(StandardValidators.createRegexValidator(0, 40, true))
.required(false)
.dynamic(true)
.build();
}
@Override
protected Collection<ValidationResult> customValidate(final ValidationContext validationContext) {
final List<ValidationResult> problems = new ArrayList<>(super.customValidate(validationContext));
// If the capture group zero is not going to be included, each dynamic property must have at least one group
final boolean includeCaptureGroupZero = validationContext.getProperty(INCLUDE_CAPTURE_GROUP_ZERO).getValue().equalsIgnoreCase("true");
getLogger().debug("Include capture group zero is " + includeCaptureGroupZero);
if (!includeCaptureGroupZero) {
final Validator oneGroupMinimumValidator = StandardValidators.createRegexValidator(1, 40, true);
for (Map.Entry<PropertyDescriptor, String> prop : validationContext.getProperties().entrySet()) {
PropertyDescriptor pd = prop.getKey();
if (pd.isDynamic()) {
String value = validationContext.getProperty(pd).getValue();
getLogger().debug("Evaluating dynamic property " + pd.getDisplayName() + " (" + pd.getName() + ") with value " + value);
ValidationResult result = oneGroupMinimumValidator.validate(pd.getDisplayName(), value, validationContext);
getLogger().debug("Validation result: " + result.toString());
if (!result.isValid()) {
problems.add(result);
}
}
}
}
return problems;
}
@OnScheduled
public final void onScheduled(final ProcessContext context) throws IOException {
final Map<String, Pattern> compiledPatternsMap = new HashMap<>();
@ -338,7 +367,7 @@ public class ExtractText extends AbstractProcessor {
final String baseKey = entry.getKey();
int start = j == 0 ? startGroupIdx : 1;
for (int i = start; i <= matcher.groupCount(); i++) {
final String key = new StringBuilder(baseKey).append(".").append(i+j).toString();
final String key = new StringBuilder(baseKey).append(".").append(i + j).toString();
String value = matcher.group(i);
if (value != null && !value.isEmpty()) {
if (value.length() > maxCaptureGroupLength) {
@ -351,7 +380,7 @@ public class ExtractText extends AbstractProcessor {
}
}
j += matcher.groupCount();
if(!context.getProperty(ENABLE_REPEATING_CAPTURE_GROUP).asBoolean()) {
if (!context.getProperty(ENABLE_REPEATING_CAPTURE_GROUP).asBoolean()) {
break;
}
}

View File

@ -22,7 +22,6 @@ import static org.junit.Assert.assertTrue;
import java.io.UnsupportedEncodingException;
import java.util.Set;
import java.util.regex.Pattern;
import org.apache.nifi.processor.Relationship;
import org.apache.nifi.util.MockFlowFile;
import org.apache.nifi.util.TestRunner;
@ -216,14 +215,6 @@ public class TestExtractText {
out.assertAttributeEquals("regex.result7", null);
}
@Test(expected = java.lang.AssertionError.class)
public void testNoCaptureGroups() throws UnsupportedEncodingException {
final TestRunner testRunner = TestRunners.newTestRunner(new ExtractText());
testRunner.setProperty("regex.result1", ".*");
testRunner.enqueue(SAMPLE_STRING.getBytes("UTF-8"));
testRunner.run();
}
@Test
public void testNoFlowFile() throws UnsupportedEncodingException {
final TestRunner testRunner = TestRunners.newTestRunner(new ExtractText());
@ -422,4 +413,39 @@ public class TestExtractText {
out.assertAttributeNotExists(attributeKey + ".0");
out.assertAttributeEquals(attributeKey, SAMPLE_STRING);
}
@Test
public void testShouldAllowNoCaptureGroups() throws Exception {
// Arrange
final TestRunner testRunner = TestRunners.newTestRunner(new ExtractText());
final String attributeKey = "regex.result";
testRunner.setProperty(attributeKey, "(?s).*");
// Act
testRunner.enqueue(SAMPLE_STRING.getBytes("UTF-8"));
testRunner.run();
// Assert
testRunner.assertAllFlowFilesTransferred(ExtractText.REL_MATCH, 1);
final MockFlowFile out = testRunner.getFlowFilesForRelationship(ExtractText.REL_MATCH).get(0);
// There is no global capture group, so only "key.0" exists
out.assertAttributeNotExists(attributeKey);
out.assertAttributeEquals(attributeKey + ".0", SAMPLE_STRING);
}
@Test(expected = java.lang.AssertionError.class)
public void testShouldNotAllowNoCaptureGroupsIfZeroDisabled() throws Exception {
// Arrange
final TestRunner testRunner = TestRunners.newTestRunner(new ExtractText());
testRunner.setProperty(ExtractText.INCLUDE_CAPTURE_GROUP_ZERO, "false");
final String attributeKey = "regex.result";
testRunner.setProperty(attributeKey, "(?s).*");
// Act
testRunner.enqueue(SAMPLE_STRING.getBytes("UTF-8"));
// Validation should fail because nothing will match
testRunner.run();
}
}