Merge pull request #15167 from talevy/custom_grok_patterns
add ability to define custom grok patterns within processor config
This commit is contained in:
commit
41a953bf8b
|
@ -245,6 +245,7 @@ TIME (?!<[0-9])%{HOUR}:%{MINUTE}(?::%{SECOND})(?![0-9])
|
||||||
| Name | Required | Default | Description
|
| Name | Required | Default | Description
|
||||||
| `match_field` | yes | - | The field to use for grok expression parsing
|
| `match_field` | yes | - | The field to use for grok expression parsing
|
||||||
| `match_pattern` | yes | - | The grok expression to match and extract named captures with
|
| `match_pattern` | yes | - | The grok expression to match and extract named captures with
|
||||||
|
| `pattern_definitions` | no | - | A map of pattern-name and pattern tuples defining custom patterns to be used by the current processor. Patterns matching existing names will override the pre-existing definition.
|
||||||
|======
|
|======
|
||||||
|
|
||||||
Here is an example of using the provided patterns to extract out and name structured fields from a string field in
|
Here is an example of using the provided patterns to extract out and name structured fields from a string field in
|
||||||
|
@ -295,6 +296,28 @@ This pipeline will insert these named captures as new fields within the document
|
||||||
}
|
}
|
||||||
--------------------------------------------------
|
--------------------------------------------------
|
||||||
|
|
||||||
|
An example of a pipeline specifying custom pattern definitions:
|
||||||
|
|
||||||
|
[source,js]
|
||||||
|
--------------------------------------------------
|
||||||
|
{
|
||||||
|
"description" : "...",
|
||||||
|
"processors": [
|
||||||
|
{
|
||||||
|
"grok": {
|
||||||
|
"match_field": "message",
|
||||||
|
"match_pattern": "my %{FAVORITE_DOG:dog} is colored %{RGB:color}"
|
||||||
|
"pattern_definitions" : {
|
||||||
|
"FAVORITE_DOG" : "beagle",
|
||||||
|
"RGB" : "RED|GREEN|BLUE"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
--------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
==== Geoip processor
|
==== Geoip processor
|
||||||
|
|
||||||
The GeoIP processor adds information about the geographical location of IP addresses, based on data from the Maxmind databases.
|
The GeoIP processor adds information about the geographical location of IP addresses, based on data from the Maxmind databases.
|
||||||
|
|
|
@ -98,7 +98,10 @@ public final class GrokProcessor implements Processor {
|
||||||
public GrokProcessor create(Map<String, Object> config) throws Exception {
|
public GrokProcessor create(Map<String, Object> config) throws Exception {
|
||||||
String matchField = ConfigurationUtils.readStringProperty(config, "field");
|
String matchField = ConfigurationUtils.readStringProperty(config, "field");
|
||||||
String matchPattern = ConfigurationUtils.readStringProperty(config, "pattern");
|
String matchPattern = ConfigurationUtils.readStringProperty(config, "pattern");
|
||||||
|
Map<String, String> customPatternBank = ConfigurationUtils.readOptionalMap(config, "pattern_definitions");
|
||||||
|
|
||||||
Map<String, String> patternBank = new HashMap<>();
|
Map<String, String> patternBank = new HashMap<>();
|
||||||
|
|
||||||
Path patternsDirectory = grokConfigDirectory.resolve("patterns");
|
Path patternsDirectory = grokConfigDirectory.resolve("patterns");
|
||||||
try (DirectoryStream<Path> stream = Files.newDirectoryStream(patternsDirectory)) {
|
try (DirectoryStream<Path> stream = Files.newDirectoryStream(patternsDirectory)) {
|
||||||
for (Path patternFilePath : stream) {
|
for (Path patternFilePath : stream) {
|
||||||
|
@ -110,6 +113,10 @@ public final class GrokProcessor implements Processor {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (customPatternBank != null) {
|
||||||
|
patternBank.putAll(customPatternBank);
|
||||||
|
}
|
||||||
|
|
||||||
Grok grok = new Grok(patternBank, matchPattern);
|
Grok grok = new Grok(patternBank, matchPattern);
|
||||||
return new GrokProcessor(grok, matchField);
|
return new GrokProcessor(grok, matchField);
|
||||||
}
|
}
|
||||||
|
|
|
@ -24,6 +24,7 @@ import org.junit.Before;
|
||||||
|
|
||||||
import java.nio.file.Files;
|
import java.nio.file.Files;
|
||||||
import java.nio.file.Path;
|
import java.nio.file.Path;
|
||||||
|
import java.util.Collections;
|
||||||
import java.util.HashMap;
|
import java.util.HashMap;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
|
||||||
|
@ -53,4 +54,16 @@ public class GrokProcessorFactoryTests extends ESTestCase {
|
||||||
assertThat(processor.getGrok(), notNullValue());
|
assertThat(processor.getGrok(), notNullValue());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void testCreateWithCustomPatterns() throws Exception {
|
||||||
|
GrokProcessor.Factory factory = new GrokProcessor.Factory(configDir);
|
||||||
|
|
||||||
|
Map<String, Object> config = new HashMap<>();
|
||||||
|
config.put("field", "_field");
|
||||||
|
config.put("pattern", "%{MY_PATTERN:name}!");
|
||||||
|
config.put("pattern_definitions", Collections.singletonMap("MY_PATTERN", "foo"));
|
||||||
|
GrokProcessor processor = factory.create(config);
|
||||||
|
assertThat(processor.getMatchField(), equalTo("_field"));
|
||||||
|
assertThat(processor.getGrok(), notNullValue());
|
||||||
|
assertThat(processor.getGrok().match("foo!"), equalTo(true));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -46,3 +46,100 @@
|
||||||
- match: { _source.status: 400 }
|
- match: { _source.status: 400 }
|
||||||
- match: { _source.msg: "foo" }
|
- match: { _source.msg: "foo" }
|
||||||
|
|
||||||
|
---
|
||||||
|
"Test Grok Pipeline With Custom Pattern":
|
||||||
|
- do:
|
||||||
|
cluster.health:
|
||||||
|
wait_for_status: green
|
||||||
|
|
||||||
|
- do:
|
||||||
|
ingest.put_pipeline:
|
||||||
|
id: "my_pipeline"
|
||||||
|
body: >
|
||||||
|
{
|
||||||
|
"description": "_description",
|
||||||
|
"processors": [
|
||||||
|
{
|
||||||
|
"grok" : {
|
||||||
|
"field" : "field1",
|
||||||
|
"pattern" : "<%{MY_PATTERN:msg}>",
|
||||||
|
"pattern_definitions" : {
|
||||||
|
"MY_PATTERN" : "foo"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
- match: { _id: "my_pipeline" }
|
||||||
|
|
||||||
|
# Simulate a Thread.sleep(), because pipeline are updated in the background
|
||||||
|
- do:
|
||||||
|
catch: request_timeout
|
||||||
|
cluster.health:
|
||||||
|
wait_for_nodes: 99
|
||||||
|
timeout: 2s
|
||||||
|
- match: { "timed_out": true }
|
||||||
|
|
||||||
|
- do:
|
||||||
|
ingest.index:
|
||||||
|
index: test
|
||||||
|
type: test
|
||||||
|
id: 1
|
||||||
|
pipeline_id: "my_pipeline"
|
||||||
|
body: {field1: "<foo>"}
|
||||||
|
|
||||||
|
- do:
|
||||||
|
get:
|
||||||
|
index: test
|
||||||
|
type: test
|
||||||
|
id: 1
|
||||||
|
- match: { _source.msg: "foo" }
|
||||||
|
|
||||||
|
---
|
||||||
|
"Test Grok Pipeline With Custom Pattern Sharing Same Name As Another":
|
||||||
|
- do:
|
||||||
|
cluster.health:
|
||||||
|
wait_for_status: green
|
||||||
|
|
||||||
|
- do:
|
||||||
|
ingest.put_pipeline:
|
||||||
|
id: "my_pipeline"
|
||||||
|
body: >
|
||||||
|
{
|
||||||
|
"description": "_description",
|
||||||
|
"processors": [
|
||||||
|
{
|
||||||
|
"grok" : {
|
||||||
|
"field" : "field1",
|
||||||
|
"pattern" : "<%{NUMBER:msg}>",
|
||||||
|
"pattern_definitions" : {
|
||||||
|
"NUMBER" : "foo"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
- match: { _id: "my_pipeline" }
|
||||||
|
|
||||||
|
# Simulate a Thread.sleep(), because pipeline are updated in the background
|
||||||
|
- do:
|
||||||
|
catch: request_timeout
|
||||||
|
cluster.health:
|
||||||
|
wait_for_nodes: 99
|
||||||
|
timeout: 2s
|
||||||
|
- match: { "timed_out": true }
|
||||||
|
|
||||||
|
- do:
|
||||||
|
ingest.index:
|
||||||
|
index: test
|
||||||
|
type: test
|
||||||
|
id: 1
|
||||||
|
pipeline_id: "my_pipeline"
|
||||||
|
body: {field1: "<foo>"}
|
||||||
|
|
||||||
|
- do:
|
||||||
|
get:
|
||||||
|
index: test
|
||||||
|
type: test
|
||||||
|
id: 1
|
||||||
|
- match: { _source.msg: "foo" }
|
||||||
|
|
Loading…
Reference in New Issue