Merge pull request #15167 from talevy/custom_grok_patterns
add ability to define custom grok patterns within processor config
This commit is contained in:
commit
41a953bf8b
|
@ -245,6 +245,7 @@ TIME (?!<[0-9])%{HOUR}:%{MINUTE}(?::%{SECOND})(?![0-9])
|
|||
| Name | Required | Default | Description
|
||||
| `match_field` | yes | - | The field to use for grok expression parsing
|
||||
| `match_pattern` | yes | - | The grok expression to match and extract named captures with
|
||||
| `pattern_definitions` | no | - | A map of pattern-name and pattern tuples defining custom patterns to be used by the current processor. Patterns matching existing names will override the pre-existing definition.
|
||||
|======
|
||||
|
||||
Here is an example of using the provided patterns to extract out and name structured fields from a string field in
|
||||
|
@ -295,6 +296,28 @@ This pipeline will insert these named captures as new fields within the document
|
|||
}
|
||||
--------------------------------------------------
|
||||
|
||||
An example of a pipeline specifying custom pattern definitions:
|
||||
|
||||
[source,js]
|
||||
--------------------------------------------------
|
||||
{
|
||||
"description" : "...",
|
||||
"processors": [
|
||||
{
|
||||
"grok": {
|
||||
"match_field": "message",
|
||||
"match_pattern": "my %{FAVORITE_DOG:dog} is colored %{RGB:color}"
|
||||
"pattern_definitions" : {
|
||||
"FAVORITE_DOG" : "beagle",
|
||||
"RGB" : "RED|GREEN|BLUE"
|
||||
}
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
--------------------------------------------------
|
||||
|
||||
|
||||
==== Geoip processor
|
||||
|
||||
The GeoIP processor adds information about the geographical location of IP addresses, based on data from the Maxmind databases.
|
||||
|
|
|
@ -98,7 +98,10 @@ public final class GrokProcessor implements Processor {
|
|||
public GrokProcessor create(Map<String, Object> config) throws Exception {
|
||||
String matchField = ConfigurationUtils.readStringProperty(config, "field");
|
||||
String matchPattern = ConfigurationUtils.readStringProperty(config, "pattern");
|
||||
Map<String, String> customPatternBank = ConfigurationUtils.readOptionalMap(config, "pattern_definitions");
|
||||
|
||||
Map<String, String> patternBank = new HashMap<>();
|
||||
|
||||
Path patternsDirectory = grokConfigDirectory.resolve("patterns");
|
||||
try (DirectoryStream<Path> stream = Files.newDirectoryStream(patternsDirectory)) {
|
||||
for (Path patternFilePath : stream) {
|
||||
|
@ -110,6 +113,10 @@ public final class GrokProcessor implements Processor {
|
|||
}
|
||||
}
|
||||
|
||||
if (customPatternBank != null) {
|
||||
patternBank.putAll(customPatternBank);
|
||||
}
|
||||
|
||||
Grok grok = new Grok(patternBank, matchPattern);
|
||||
return new GrokProcessor(grok, matchField);
|
||||
}
|
||||
|
|
|
@ -24,6 +24,7 @@ import org.junit.Before;
|
|||
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
import java.util.Collections;
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
|
||||
|
@ -53,4 +54,16 @@ public class GrokProcessorFactoryTests extends ESTestCase {
|
|||
assertThat(processor.getGrok(), notNullValue());
|
||||
}
|
||||
|
||||
public void testCreateWithCustomPatterns() throws Exception {
|
||||
GrokProcessor.Factory factory = new GrokProcessor.Factory(configDir);
|
||||
|
||||
Map<String, Object> config = new HashMap<>();
|
||||
config.put("field", "_field");
|
||||
config.put("pattern", "%{MY_PATTERN:name}!");
|
||||
config.put("pattern_definitions", Collections.singletonMap("MY_PATTERN", "foo"));
|
||||
GrokProcessor processor = factory.create(config);
|
||||
assertThat(processor.getMatchField(), equalTo("_field"));
|
||||
assertThat(processor.getGrok(), notNullValue());
|
||||
assertThat(processor.getGrok().match("foo!"), equalTo(true));
|
||||
}
|
||||
}
|
||||
|
|
|
@ -46,3 +46,100 @@
|
|||
- match: { _source.status: 400 }
|
||||
- match: { _source.msg: "foo" }
|
||||
|
||||
---
|
||||
"Test Grok Pipeline With Custom Pattern":
|
||||
- do:
|
||||
cluster.health:
|
||||
wait_for_status: green
|
||||
|
||||
- do:
|
||||
ingest.put_pipeline:
|
||||
id: "my_pipeline"
|
||||
body: >
|
||||
{
|
||||
"description": "_description",
|
||||
"processors": [
|
||||
{
|
||||
"grok" : {
|
||||
"field" : "field1",
|
||||
"pattern" : "<%{MY_PATTERN:msg}>",
|
||||
"pattern_definitions" : {
|
||||
"MY_PATTERN" : "foo"
|
||||
}
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
- match: { _id: "my_pipeline" }
|
||||
|
||||
# Simulate a Thread.sleep(), because pipeline are updated in the background
|
||||
- do:
|
||||
catch: request_timeout
|
||||
cluster.health:
|
||||
wait_for_nodes: 99
|
||||
timeout: 2s
|
||||
- match: { "timed_out": true }
|
||||
|
||||
- do:
|
||||
ingest.index:
|
||||
index: test
|
||||
type: test
|
||||
id: 1
|
||||
pipeline_id: "my_pipeline"
|
||||
body: {field1: "<foo>"}
|
||||
|
||||
- do:
|
||||
get:
|
||||
index: test
|
||||
type: test
|
||||
id: 1
|
||||
- match: { _source.msg: "foo" }
|
||||
|
||||
---
|
||||
"Test Grok Pipeline With Custom Pattern Sharing Same Name As Another":
|
||||
- do:
|
||||
cluster.health:
|
||||
wait_for_status: green
|
||||
|
||||
- do:
|
||||
ingest.put_pipeline:
|
||||
id: "my_pipeline"
|
||||
body: >
|
||||
{
|
||||
"description": "_description",
|
||||
"processors": [
|
||||
{
|
||||
"grok" : {
|
||||
"field" : "field1",
|
||||
"pattern" : "<%{NUMBER:msg}>",
|
||||
"pattern_definitions" : {
|
||||
"NUMBER" : "foo"
|
||||
}
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
- match: { _id: "my_pipeline" }
|
||||
|
||||
# Simulate a Thread.sleep(), because pipeline are updated in the background
|
||||
- do:
|
||||
catch: request_timeout
|
||||
cluster.health:
|
||||
wait_for_nodes: 99
|
||||
timeout: 2s
|
||||
- match: { "timed_out": true }
|
||||
|
||||
- do:
|
||||
ingest.index:
|
||||
index: test
|
||||
type: test
|
||||
id: 1
|
||||
pipeline_id: "my_pipeline"
|
||||
body: {field1: "<foo>"}
|
||||
|
||||
- do:
|
||||
get:
|
||||
index: test
|
||||
type: test
|
||||
id: 1
|
||||
- match: { _source.msg: "foo" }
|
||||
|
|
Loading…
Reference in New Issue