mirror of https://github.com/apache/nifi.git
NIFI-4095 Changed minimum capture group count in ExtractText from 1 to 0.
Added unit test and removed obsolete test. Added custom validation to enforce capture group if "include capture group 0" is false.
This commit is contained in:
parent
5c755c006b
commit
253ea2e73b
|
@ -20,6 +20,7 @@ import java.io.IOException;
|
|||
import java.io.InputStream;
|
||||
import java.nio.charset.Charset;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collection;
|
||||
import java.util.Collections;
|
||||
import java.util.HashMap;
|
||||
import java.util.HashSet;
|
||||
|
@ -31,7 +32,6 @@ import java.util.concurrent.LinkedBlockingQueue;
|
|||
import java.util.concurrent.atomic.AtomicReference;
|
||||
import java.util.regex.Matcher;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
import org.apache.nifi.annotation.behavior.DynamicProperty;
|
||||
import org.apache.nifi.annotation.behavior.EventDriven;
|
||||
import org.apache.nifi.annotation.behavior.InputRequirement;
|
||||
|
@ -43,6 +43,9 @@ import org.apache.nifi.annotation.documentation.Tags;
|
|||
import org.apache.nifi.annotation.lifecycle.OnScheduled;
|
||||
import org.apache.nifi.annotation.lifecycle.OnStopped;
|
||||
import org.apache.nifi.components.PropertyDescriptor;
|
||||
import org.apache.nifi.components.ValidationContext;
|
||||
import org.apache.nifi.components.ValidationResult;
|
||||
import org.apache.nifi.components.Validator;
|
||||
import org.apache.nifi.flowfile.FlowFile;
|
||||
import org.apache.nifi.logging.ComponentLog;
|
||||
import org.apache.nifi.processor.AbstractProcessor;
|
||||
|
@ -259,12 +262,38 @@ public class ExtractText extends AbstractProcessor {
|
|||
return new PropertyDescriptor.Builder()
|
||||
.name(propertyDescriptorName)
|
||||
.expressionLanguageSupported(false)
|
||||
.addValidator(StandardValidators.createRegexValidator(1, 40, true))
|
||||
.addValidator(StandardValidators.createRegexValidator(0, 40, true))
|
||||
.required(false)
|
||||
.dynamic(true)
|
||||
.build();
|
||||
}
|
||||
|
||||
@Override
|
||||
protected Collection<ValidationResult> customValidate(final ValidationContext validationContext) {
|
||||
final List<ValidationResult> problems = new ArrayList<>(super.customValidate(validationContext));
|
||||
|
||||
// If the capture group zero is not going to be included, each dynamic property must have at least one group
|
||||
final boolean includeCaptureGroupZero = validationContext.getProperty(INCLUDE_CAPTURE_GROUP_ZERO).getValue().equalsIgnoreCase("true");
|
||||
getLogger().debug("Include capture group zero is " + includeCaptureGroupZero);
|
||||
if (!includeCaptureGroupZero) {
|
||||
final Validator oneGroupMinimumValidator = StandardValidators.createRegexValidator(1, 40, true);
|
||||
for (Map.Entry<PropertyDescriptor, String> prop : validationContext.getProperties().entrySet()) {
|
||||
PropertyDescriptor pd = prop.getKey();
|
||||
if (pd.isDynamic()) {
|
||||
String value = validationContext.getProperty(pd).getValue();
|
||||
getLogger().debug("Evaluating dynamic property " + pd.getDisplayName() + " (" + pd.getName() + ") with value " + value);
|
||||
ValidationResult result = oneGroupMinimumValidator.validate(pd.getDisplayName(), value, validationContext);
|
||||
getLogger().debug("Validation result: " + result.toString());
|
||||
if (!result.isValid()) {
|
||||
problems.add(result);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return problems;
|
||||
}
|
||||
|
||||
@OnScheduled
|
||||
public final void onScheduled(final ProcessContext context) throws IOException {
|
||||
final Map<String, Pattern> compiledPatternsMap = new HashMap<>();
|
||||
|
@ -338,7 +367,7 @@ public class ExtractText extends AbstractProcessor {
|
|||
final String baseKey = entry.getKey();
|
||||
int start = j == 0 ? startGroupIdx : 1;
|
||||
for (int i = start; i <= matcher.groupCount(); i++) {
|
||||
final String key = new StringBuilder(baseKey).append(".").append(i+j).toString();
|
||||
final String key = new StringBuilder(baseKey).append(".").append(i + j).toString();
|
||||
String value = matcher.group(i);
|
||||
if (value != null && !value.isEmpty()) {
|
||||
if (value.length() > maxCaptureGroupLength) {
|
||||
|
@ -351,7 +380,7 @@ public class ExtractText extends AbstractProcessor {
|
|||
}
|
||||
}
|
||||
j += matcher.groupCount();
|
||||
if(!context.getProperty(ENABLE_REPEATING_CAPTURE_GROUP).asBoolean()) {
|
||||
if (!context.getProperty(ENABLE_REPEATING_CAPTURE_GROUP).asBoolean()) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -22,7 +22,6 @@ import static org.junit.Assert.assertTrue;
|
|||
import java.io.UnsupportedEncodingException;
|
||||
import java.util.Set;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
import org.apache.nifi.processor.Relationship;
|
||||
import org.apache.nifi.util.MockFlowFile;
|
||||
import org.apache.nifi.util.TestRunner;
|
||||
|
@ -216,14 +215,6 @@ public class TestExtractText {
|
|||
out.assertAttributeEquals("regex.result7", null);
|
||||
}
|
||||
|
||||
@Test(expected = java.lang.AssertionError.class)
|
||||
public void testNoCaptureGroups() throws UnsupportedEncodingException {
|
||||
final TestRunner testRunner = TestRunners.newTestRunner(new ExtractText());
|
||||
testRunner.setProperty("regex.result1", ".*");
|
||||
testRunner.enqueue(SAMPLE_STRING.getBytes("UTF-8"));
|
||||
testRunner.run();
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testNoFlowFile() throws UnsupportedEncodingException {
|
||||
final TestRunner testRunner = TestRunners.newTestRunner(new ExtractText());
|
||||
|
@ -422,4 +413,39 @@ public class TestExtractText {
|
|||
out.assertAttributeNotExists(attributeKey + ".0");
|
||||
out.assertAttributeEquals(attributeKey, SAMPLE_STRING);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testShouldAllowNoCaptureGroups() throws Exception {
|
||||
// Arrange
|
||||
final TestRunner testRunner = TestRunners.newTestRunner(new ExtractText());
|
||||
final String attributeKey = "regex.result";
|
||||
testRunner.setProperty(attributeKey, "(?s).*");
|
||||
|
||||
// Act
|
||||
testRunner.enqueue(SAMPLE_STRING.getBytes("UTF-8"));
|
||||
testRunner.run();
|
||||
|
||||
// Assert
|
||||
testRunner.assertAllFlowFilesTransferred(ExtractText.REL_MATCH, 1);
|
||||
final MockFlowFile out = testRunner.getFlowFilesForRelationship(ExtractText.REL_MATCH).get(0);
|
||||
|
||||
// There is no global capture group, so only "key.0" exists
|
||||
out.assertAttributeNotExists(attributeKey);
|
||||
out.assertAttributeEquals(attributeKey + ".0", SAMPLE_STRING);
|
||||
}
|
||||
|
||||
@Test(expected = java.lang.AssertionError.class)
|
||||
public void testShouldNotAllowNoCaptureGroupsIfZeroDisabled() throws Exception {
|
||||
// Arrange
|
||||
final TestRunner testRunner = TestRunners.newTestRunner(new ExtractText());
|
||||
testRunner.setProperty(ExtractText.INCLUDE_CAPTURE_GROUP_ZERO, "false");
|
||||
final String attributeKey = "regex.result";
|
||||
testRunner.setProperty(attributeKey, "(?s).*");
|
||||
|
||||
// Act
|
||||
testRunner.enqueue(SAMPLE_STRING.getBytes("UTF-8"));
|
||||
|
||||
// Validation should fail because nothing will match
|
||||
testRunner.run();
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue