mirror of
https://github.com/apache/nifi.git
synced 2025-02-16 15:06:00 +00:00
NIFI-3635: This closes #1631. Avoid using a static member variable for the 'Grok' object. Code cleanup
Signed-off-by: joewitt <joewitt@apache.org>
This commit is contained in:
parent
35f4f48f37
commit
a5d630672a
@ -43,7 +43,6 @@ import org.apache.nifi.processor.exception.ProcessException;
|
|||||||
import org.apache.nifi.processor.io.InputStreamCallback;
|
import org.apache.nifi.processor.io.InputStreamCallback;
|
||||||
import org.apache.nifi.processor.io.StreamCallback;
|
import org.apache.nifi.processor.io.StreamCallback;
|
||||||
import org.apache.nifi.processor.util.StandardValidators;
|
import org.apache.nifi.processor.util.StandardValidators;
|
||||||
import org.apache.nifi.stream.io.BufferedOutputStream;
|
|
||||||
import org.apache.nifi.stream.io.StreamUtils;
|
import org.apache.nifi.stream.io.StreamUtils;
|
||||||
import org.apache.nifi.util.StopWatch;
|
import org.apache.nifi.util.StopWatch;
|
||||||
|
|
||||||
@ -73,20 +72,19 @@ import java.util.concurrent.TimeUnit;
|
|||||||
"if the grok identifier \"timestamp\" is matched, then the value will be added to an attribute named \"grok.timestamp\"")})
|
"if the grok identifier \"timestamp\" is matched, then the value will be added to an attribute named \"grok.timestamp\"")})
|
||||||
public class ExtractGrok extends AbstractProcessor {
|
public class ExtractGrok extends AbstractProcessor {
|
||||||
|
|
||||||
|
|
||||||
public static final String FLOWFILE_ATTRIBUTE = "flowfile-attribute";
|
public static final String FLOWFILE_ATTRIBUTE = "flowfile-attribute";
|
||||||
public static final String FLOWFILE_CONTENT = "flowfile-content";
|
public static final String FLOWFILE_CONTENT = "flowfile-content";
|
||||||
private static final String APPLICATION_JSON = "application/json";
|
private static final String APPLICATION_JSON = "application/json";
|
||||||
|
|
||||||
public static final PropertyDescriptor GROK_EXPRESSION = new PropertyDescriptor
|
public static final PropertyDescriptor GROK_EXPRESSION = new PropertyDescriptor.Builder()
|
||||||
.Builder().name("Grok Expression")
|
.name("Grok Expression")
|
||||||
.description("Grok expression")
|
.description("Grok expression")
|
||||||
.required(true)
|
.required(true)
|
||||||
.addValidator(validateGrokExpression())
|
.addValidator(validateGrokExpression())
|
||||||
.build();
|
.build();
|
||||||
|
|
||||||
public static final PropertyDescriptor GROK_PATTERN_FILE = new PropertyDescriptor
|
public static final PropertyDescriptor GROK_PATTERN_FILE = new PropertyDescriptor.Builder()
|
||||||
.Builder().name("Grok Pattern file")
|
.name("Grok Pattern file")
|
||||||
.description("Grok Pattern file definition")
|
.description("Grok Pattern file definition")
|
||||||
.required(true)
|
.required(true)
|
||||||
.addValidator(StandardValidators.FILE_EXISTS_VALIDATOR)
|
.addValidator(StandardValidators.FILE_EXISTS_VALIDATOR)
|
||||||
@ -98,22 +96,21 @@ public class ExtractGrok extends AbstractProcessor {
|
|||||||
"each of the Grok identifier that is matched in the flowfile will be added as an attribute, " +
|
"each of the Grok identifier that is matched in the flowfile will be added as an attribute, " +
|
||||||
"prefixed with \"grok.\" or written in the flowfile content. Writing to flowfile content " +
|
"prefixed with \"grok.\" or written in the flowfile content. Writing to flowfile content " +
|
||||||
"will overwrite any existing flowfile content.")
|
"will overwrite any existing flowfile content.")
|
||||||
|
|
||||||
.required(true)
|
.required(true)
|
||||||
.allowableValues(FLOWFILE_ATTRIBUTE, FLOWFILE_CONTENT)
|
.allowableValues(FLOWFILE_ATTRIBUTE, FLOWFILE_CONTENT)
|
||||||
.defaultValue(FLOWFILE_ATTRIBUTE)
|
.defaultValue(FLOWFILE_ATTRIBUTE)
|
||||||
.build();
|
.build();
|
||||||
|
|
||||||
public static final PropertyDescriptor CHARACTER_SET = new PropertyDescriptor
|
public static final PropertyDescriptor CHARACTER_SET = new PropertyDescriptor.Builder()
|
||||||
.Builder().name("Character Set")
|
.name("Character Set")
|
||||||
.description("The Character Set in which the file is encoded")
|
.description("The Character Set in which the file is encoded")
|
||||||
.required(true)
|
.required(true)
|
||||||
.addValidator(StandardValidators.CHARACTER_SET_VALIDATOR)
|
.addValidator(StandardValidators.CHARACTER_SET_VALIDATOR)
|
||||||
.defaultValue("UTF-8")
|
.defaultValue("UTF-8")
|
||||||
.build();
|
.build();
|
||||||
|
|
||||||
public static final PropertyDescriptor MAX_BUFFER_SIZE = new PropertyDescriptor
|
public static final PropertyDescriptor MAX_BUFFER_SIZE = new PropertyDescriptor.Builder()
|
||||||
.Builder().name("Maximum Buffer Size")
|
.name("Maximum Buffer Size")
|
||||||
.description("Specifies the maximum amount of data to buffer (per file) in order to apply the Grok expressions. Files larger than the specified maximum will not be fully evaluated.")
|
.description("Specifies the maximum amount of data to buffer (per file) in order to apply the Grok expressions. Files larger than the specified maximum will not be fully evaluated.")
|
||||||
.required(true)
|
.required(true)
|
||||||
.addValidator(StandardValidators.DATA_SIZE_VALIDATOR)
|
.addValidator(StandardValidators.DATA_SIZE_VALIDATOR)
|
||||||
@ -134,11 +131,9 @@ public class ExtractGrok extends AbstractProcessor {
|
|||||||
private final static List<PropertyDescriptor> descriptors;
|
private final static List<PropertyDescriptor> descriptors;
|
||||||
private final static Set<Relationship> relationships;
|
private final static Set<Relationship> relationships;
|
||||||
|
|
||||||
|
private volatile Grok grok = new Grok();
|
||||||
private final static Grok grok = Grok.EMPTY;
|
|
||||||
private final BlockingQueue<byte[]> bufferQueue = new LinkedBlockingQueue<>();
|
private final BlockingQueue<byte[]> bufferQueue = new LinkedBlockingQueue<>();
|
||||||
|
|
||||||
|
|
||||||
static {
|
static {
|
||||||
final Set<Relationship> _relationships = new HashSet<>();
|
final Set<Relationship> _relationships = new HashSet<>();
|
||||||
_relationships.add(REL_MATCH);
|
_relationships.add(REL_MATCH);
|
||||||
@ -154,7 +149,6 @@ public class ExtractGrok extends AbstractProcessor {
|
|||||||
descriptors = Collections.unmodifiableList(_descriptors);
|
descriptors = Collections.unmodifiableList(_descriptors);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public Set<Relationship> getRelationships() {
|
public Set<Relationship> getRelationships() {
|
||||||
return relationships;
|
return relationships;
|
||||||
@ -165,7 +159,6 @@ public class ExtractGrok extends AbstractProcessor {
|
|||||||
return descriptors;
|
return descriptors;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@OnStopped
|
@OnStopped
|
||||||
public void onStopped() {
|
public void onStopped() {
|
||||||
bufferQueue.clear();
|
bufferQueue.clear();
|
||||||
@ -173,18 +166,15 @@ public class ExtractGrok extends AbstractProcessor {
|
|||||||
|
|
||||||
@OnScheduled
|
@OnScheduled
|
||||||
public void onScheduled(final ProcessContext context) throws GrokException {
|
public void onScheduled(final ProcessContext context) throws GrokException {
|
||||||
|
|
||||||
|
|
||||||
for (int i = 0; i < context.getMaxConcurrentTasks(); i++) {
|
for (int i = 0; i < context.getMaxConcurrentTasks(); i++) {
|
||||||
final int maxBufferSize = context.getProperty(MAX_BUFFER_SIZE).asDataSize(DataUnit.B).intValue();
|
final int maxBufferSize = context.getProperty(MAX_BUFFER_SIZE).asDataSize(DataUnit.B).intValue();
|
||||||
final byte[] buffer = new byte[maxBufferSize];
|
final byte[] buffer = new byte[maxBufferSize];
|
||||||
bufferQueue.add(buffer);
|
bufferQueue.add(buffer);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
grok = new Grok();
|
||||||
grok.addPatternFromFile(context.getProperty(GROK_PATTERN_FILE).getValue());
|
grok.addPatternFromFile(context.getProperty(GROK_PATTERN_FILE).getValue());
|
||||||
grok.compile(context.getProperty(GROK_EXPRESSION).getValue());
|
grok.compile(context.getProperty(GROK_EXPRESSION).getValue());
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
@ -216,20 +206,18 @@ public class ExtractGrok extends AbstractProcessor {
|
|||||||
bufferQueue.offer(buffer);
|
bufferQueue.offer(buffer);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
final Match gm = grok.match(contentString);
|
final Match gm = grok.match(contentString);
|
||||||
gm.captures();
|
gm.captures();
|
||||||
|
|
||||||
|
|
||||||
if (gm.toMap().isEmpty()) {
|
if (gm.toMap().isEmpty()) {
|
||||||
session.transfer(flowFile, REL_NO_MATCH);
|
session.transfer(flowFile, REL_NO_MATCH);
|
||||||
getLogger().info("Did not match any Grok Expressions for FlowFile {}", new Object[]{flowFile});
|
getLogger().info("Did not match any Grok Expressions for FlowFile {}", new Object[]{flowFile});
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
final ObjectMapper objectMapper = new ObjectMapper();
|
final ObjectMapper objectMapper = new ObjectMapper();
|
||||||
switch (context.getProperty(DESTINATION).getValue()) {
|
switch (context.getProperty(DESTINATION).getValue()) {
|
||||||
case FLOWFILE_ATTRIBUTE:
|
case FLOWFILE_ATTRIBUTE:
|
||||||
|
|
||||||
Map<String, String> grokResults = new HashMap<>();
|
Map<String, String> grokResults = new HashMap<>();
|
||||||
for (Map.Entry<String, Object> entry : gm.toMap().entrySet()) {
|
for (Map.Entry<String, Object> entry : gm.toMap().entrySet()) {
|
||||||
if (null != entry.getValue()) {
|
if (null != entry.getValue()) {
|
||||||
@ -244,13 +232,10 @@ public class ExtractGrok extends AbstractProcessor {
|
|||||||
|
|
||||||
break;
|
break;
|
||||||
case FLOWFILE_CONTENT:
|
case FLOWFILE_CONTENT:
|
||||||
|
|
||||||
FlowFile conFlowfile = session.write(flowFile, new StreamCallback() {
|
FlowFile conFlowfile = session.write(flowFile, new StreamCallback() {
|
||||||
@Override
|
@Override
|
||||||
public void process(InputStream in, OutputStream out) throws IOException {
|
public void process(InputStream in, OutputStream out) throws IOException {
|
||||||
try (OutputStream outputStream = new BufferedOutputStream(out)) {
|
out.write(objectMapper.writeValueAsBytes(gm.toMap()));
|
||||||
outputStream.write(objectMapper.writeValueAsBytes(gm.toMap()));
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
conFlowfile = session.putAttribute(conFlowfile, CoreAttributes.MIME_TYPE.key(), APPLICATION_JSON);
|
conFlowfile = session.putAttribute(conFlowfile, CoreAttributes.MIME_TYPE.key(), APPLICATION_JSON);
|
||||||
@ -258,15 +243,12 @@ public class ExtractGrok extends AbstractProcessor {
|
|||||||
session.transfer(conFlowfile, REL_MATCH);
|
session.transfer(conFlowfile, REL_MATCH);
|
||||||
|
|
||||||
break;
|
break;
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
public static final Validator validateGrokExpression() {
|
public static final Validator validateGrokExpression() {
|
||||||
return new Validator() {
|
return new Validator() {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public ValidationResult validate(String subject, String input, ValidationContext context) {
|
public ValidationResult validate(String subject, String input, ValidationContext context) {
|
||||||
|
|
||||||
@ -290,10 +272,8 @@ public class ExtractGrok extends AbstractProcessor {
|
|||||||
}
|
}
|
||||||
|
|
||||||
return new ValidationResult.Builder().subject(subject).input(input).valid(true).build();
|
return new ValidationResult.Builder().subject(subject).input(input).valid(true).build();
|
||||||
|
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -31,9 +31,8 @@ import java.nio.file.Paths;
|
|||||||
public class TestExtractGrok {
|
public class TestExtractGrok {
|
||||||
|
|
||||||
private TestRunner testRunner;
|
private TestRunner testRunner;
|
||||||
final static Path GROK_LOG_INPUT = Paths.get("src/test/resources/TestExtractGrok/apache.log");
|
private final static Path GROK_LOG_INPUT = Paths.get("src/test/resources/TestExtractGrok/apache.log");
|
||||||
final static Path GROK_TEXT_INPUT = Paths.get("src/test/resources/TestExtractGrok/simple_text.log");
|
private final static Path GROK_TEXT_INPUT = Paths.get("src/test/resources/TestExtractGrok/simple_text.log");
|
||||||
|
|
||||||
|
|
||||||
@Before
|
@Before
|
||||||
public void init() {
|
public void init() {
|
||||||
@ -42,8 +41,6 @@ public class TestExtractGrok {
|
|||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testExtractGrokWithMatchedContent() throws IOException {
|
public void testExtractGrokWithMatchedContent() throws IOException {
|
||||||
|
|
||||||
|
|
||||||
testRunner.setProperty(ExtractGrok.GROK_EXPRESSION, "%{COMMONAPACHELOG}");
|
testRunner.setProperty(ExtractGrok.GROK_EXPRESSION, "%{COMMONAPACHELOG}");
|
||||||
testRunner.setProperty(ExtractGrok.GROK_PATTERN_FILE, "src/test/resources/TestExtractGrok/patterns");
|
testRunner.setProperty(ExtractGrok.GROK_PATTERN_FILE, "src/test/resources/TestExtractGrok/patterns");
|
||||||
testRunner.enqueue(GROK_LOG_INPUT);
|
testRunner.enqueue(GROK_LOG_INPUT);
|
||||||
@ -59,13 +56,10 @@ public class TestExtractGrok {
|
|||||||
matched.assertAttributeEquals("grok.timestamp","07/Mar/2004:16:05:49 -0800");
|
matched.assertAttributeEquals("grok.timestamp","07/Mar/2004:16:05:49 -0800");
|
||||||
matched.assertAttributeEquals("grok.request","/twiki/bin/edit/Main/Double_bounce_sender?topicparent=Main.ConfigurationVariables");
|
matched.assertAttributeEquals("grok.request","/twiki/bin/edit/Main/Double_bounce_sender?topicparent=Main.ConfigurationVariables");
|
||||||
matched.assertAttributeEquals("grok.httpversion","1.1");
|
matched.assertAttributeEquals("grok.httpversion","1.1");
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testExtractGrokWithUnMatchedContent() throws IOException {
|
public void testExtractGrokWithUnMatchedContent() throws IOException {
|
||||||
|
|
||||||
|
|
||||||
testRunner.setProperty(ExtractGrok.GROK_EXPRESSION, "%{ADDRESS}");
|
testRunner.setProperty(ExtractGrok.GROK_EXPRESSION, "%{ADDRESS}");
|
||||||
testRunner.setProperty(ExtractGrok.GROK_PATTERN_FILE, "src/test/resources/TestExtractGrok/patterns");
|
testRunner.setProperty(ExtractGrok.GROK_PATTERN_FILE, "src/test/resources/TestExtractGrok/patterns");
|
||||||
testRunner.enqueue(GROK_TEXT_INPUT);
|
testRunner.enqueue(GROK_TEXT_INPUT);
|
||||||
@ -73,29 +67,23 @@ public class TestExtractGrok {
|
|||||||
testRunner.assertAllFlowFilesTransferred(ExtractGrok.REL_NO_MATCH);
|
testRunner.assertAllFlowFilesTransferred(ExtractGrok.REL_NO_MATCH);
|
||||||
final MockFlowFile notMatched = testRunner.getFlowFilesForRelationship(ExtractGrok.REL_NO_MATCH).get(0);
|
final MockFlowFile notMatched = testRunner.getFlowFilesForRelationship(ExtractGrok.REL_NO_MATCH).get(0);
|
||||||
notMatched.assertContentEquals(GROK_TEXT_INPUT);
|
notMatched.assertContentEquals(GROK_TEXT_INPUT);
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testExtractGrokWithNotFoundPatternFile() throws IOException {
|
public void testExtractGrokWithNotFoundPatternFile() throws IOException {
|
||||||
|
|
||||||
testRunner.setProperty(ExtractGrok.GROK_EXPRESSION, "%{COMMONAPACHELOG}");
|
testRunner.setProperty(ExtractGrok.GROK_EXPRESSION, "%{COMMONAPACHELOG}");
|
||||||
testRunner.setProperty(ExtractGrok.GROK_PATTERN_FILE, "src/test/resources/TestExtractGrok/toto_file");
|
testRunner.setProperty(ExtractGrok.GROK_PATTERN_FILE, "src/test/resources/TestExtractGrok/toto_file");
|
||||||
testRunner.enqueue(GROK_LOG_INPUT);
|
testRunner.enqueue(GROK_LOG_INPUT);
|
||||||
testRunner.assertNotValid();
|
testRunner.assertNotValid();
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testExtractGrokWithBadGrokExpression() throws IOException {
|
public void testExtractGrokWithBadGrokExpression() throws IOException {
|
||||||
|
|
||||||
testRunner.setProperty(ExtractGrok.GROK_EXPRESSION, "%{TOTO");
|
testRunner.setProperty(ExtractGrok.GROK_EXPRESSION, "%{TOTO");
|
||||||
testRunner.setProperty(ExtractGrok.GROK_PATTERN_FILE, "src/test/resources/TestExtractGrok/patterns");
|
testRunner.setProperty(ExtractGrok.GROK_PATTERN_FILE, "src/test/resources/TestExtractGrok/patterns");
|
||||||
testRunner.enqueue(GROK_LOG_INPUT);
|
testRunner.enqueue(GROK_LOG_INPUT);
|
||||||
testRunner.assertNotValid();
|
testRunner.assertNotValid();
|
||||||
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user