Merge pull request #15167 from talevy/custom_grok_patterns

add ability to define custom grok patterns within processor config
This commit is contained in:
Tal Levy 2015-12-03 08:27:35 -08:00
commit 41a953bf8b
4 changed files with 140 additions and 0 deletions

View File

@ -245,6 +245,7 @@ TIME (?!<[0-9])%{HOUR}:%{MINUTE}(?::%{SECOND})(?![0-9])
| Name | Required | Default | Description | Name | Required | Default | Description
| `match_field` | yes | - | The field to use for grok expression parsing | `match_field` | yes | - | The field to use for grok expression parsing
| `match_pattern` | yes | - | The grok expression to match and extract named captures with | `match_pattern` | yes | - | The grok expression to match and extract named captures with
| `pattern_definitions` | no | - | A map of pattern-name and pattern tuples defining custom patterns to be used by the current processor. Patterns matching existing names will override the pre-existing definition.
|====== |======
Here is an example of using the provided patterns to extract out and name structured fields from a string field in Here is an example of using the provided patterns to extract out and name structured fields from a string field in
@ -295,6 +296,28 @@ This pipeline will insert these named captures as new fields within the document
} }
-------------------------------------------------- --------------------------------------------------
An example of a pipeline specifying custom pattern definitions:
[source,js]
--------------------------------------------------
{
"description" : "...",
"processors": [
{
"grok": {
"match_field": "message",
"match_pattern": "my %{FAVORITE_DOG:dog} is colored %{RGB:color}"
"pattern_definitions" : {
"FAVORITE_DOG" : "beagle",
"RGB" : "RED|GREEN|BLUE"
}
}
}
]
}
--------------------------------------------------
==== Geoip processor ==== Geoip processor
The GeoIP processor adds information about the geographical location of IP addresses, based on data from the Maxmind databases. The GeoIP processor adds information about the geographical location of IP addresses, based on data from the Maxmind databases.

View File

@ -98,7 +98,10 @@ public final class GrokProcessor implements Processor {
public GrokProcessor create(Map<String, Object> config) throws Exception { public GrokProcessor create(Map<String, Object> config) throws Exception {
String matchField = ConfigurationUtils.readStringProperty(config, "field"); String matchField = ConfigurationUtils.readStringProperty(config, "field");
String matchPattern = ConfigurationUtils.readStringProperty(config, "pattern"); String matchPattern = ConfigurationUtils.readStringProperty(config, "pattern");
Map<String, String> customPatternBank = ConfigurationUtils.readOptionalMap(config, "pattern_definitions");
Map<String, String> patternBank = new HashMap<>(); Map<String, String> patternBank = new HashMap<>();
Path patternsDirectory = grokConfigDirectory.resolve("patterns"); Path patternsDirectory = grokConfigDirectory.resolve("patterns");
try (DirectoryStream<Path> stream = Files.newDirectoryStream(patternsDirectory)) { try (DirectoryStream<Path> stream = Files.newDirectoryStream(patternsDirectory)) {
for (Path patternFilePath : stream) { for (Path patternFilePath : stream) {
@ -110,6 +113,10 @@ public final class GrokProcessor implements Processor {
} }
} }
if (customPatternBank != null) {
patternBank.putAll(customPatternBank);
}
Grok grok = new Grok(patternBank, matchPattern); Grok grok = new Grok(patternBank, matchPattern);
return new GrokProcessor(grok, matchField); return new GrokProcessor(grok, matchField);
} }

View File

@ -24,6 +24,7 @@ import org.junit.Before;
import java.nio.file.Files; import java.nio.file.Files;
import java.nio.file.Path; import java.nio.file.Path;
import java.util.Collections;
import java.util.HashMap; import java.util.HashMap;
import java.util.Map; import java.util.Map;
@ -53,4 +54,16 @@ public class GrokProcessorFactoryTests extends ESTestCase {
assertThat(processor.getGrok(), notNullValue()); assertThat(processor.getGrok(), notNullValue());
} }
public void testCreateWithCustomPatterns() throws Exception {
GrokProcessor.Factory factory = new GrokProcessor.Factory(configDir);
Map<String, Object> config = new HashMap<>();
config.put("field", "_field");
config.put("pattern", "%{MY_PATTERN:name}!");
config.put("pattern_definitions", Collections.singletonMap("MY_PATTERN", "foo"));
GrokProcessor processor = factory.create(config);
assertThat(processor.getMatchField(), equalTo("_field"));
assertThat(processor.getGrok(), notNullValue());
assertThat(processor.getGrok().match("foo!"), equalTo(true));
}
} }

View File

@ -46,3 +46,100 @@
- match: { _source.status: 400 } - match: { _source.status: 400 }
- match: { _source.msg: "foo" } - match: { _source.msg: "foo" }
---
"Test Grok Pipeline With Custom Pattern":
- do:
cluster.health:
wait_for_status: green
- do:
ingest.put_pipeline:
id: "my_pipeline"
body: >
{
"description": "_description",
"processors": [
{
"grok" : {
"field" : "field1",
"pattern" : "<%{MY_PATTERN:msg}>",
"pattern_definitions" : {
"MY_PATTERN" : "foo"
}
}
}
]
}
- match: { _id: "my_pipeline" }
# Simulate a Thread.sleep(), because pipeline are updated in the background
- do:
catch: request_timeout
cluster.health:
wait_for_nodes: 99
timeout: 2s
- match: { "timed_out": true }
- do:
ingest.index:
index: test
type: test
id: 1
pipeline_id: "my_pipeline"
body: {field1: "<foo>"}
- do:
get:
index: test
type: test
id: 1
- match: { _source.msg: "foo" }
---
"Test Grok Pipeline With Custom Pattern Sharing Same Name As Another":
- do:
cluster.health:
wait_for_status: green
- do:
ingest.put_pipeline:
id: "my_pipeline"
body: >
{
"description": "_description",
"processors": [
{
"grok" : {
"field" : "field1",
"pattern" : "<%{NUMBER:msg}>",
"pattern_definitions" : {
"NUMBER" : "foo"
}
}
}
]
}
- match: { _id: "my_pipeline" }
# Simulate a Thread.sleep(), because pipeline are updated in the background
- do:
catch: request_timeout
cluster.health:
wait_for_nodes: 99
timeout: 2s
- match: { "timed_out": true }
- do:
ingest.index:
index: test
type: test
id: 1
pipeline_id: "my_pipeline"
body: {field1: "<foo>"}
- do:
get:
index: test
type: test
id: 1
- match: { _source.msg: "foo" }