diff --git a/docs/reference/ingest/ingest-node.asciidoc b/docs/reference/ingest/ingest-node.asciidoc index 3dcf719f382..54a10f766fd 100644 --- a/docs/reference/ingest/ingest-node.asciidoc +++ b/docs/reference/ingest/ingest-node.asciidoc @@ -1038,8 +1038,9 @@ Grok expression. |====== | Name | Required | Default | Description | `field` | yes | - | The field to use for grok expression parsing -| `pattern` | yes | - | The grok expression to match and extract named captures with +| `patterns` | yes | - | An ordered list of grok expression to match and extract named captures with. Returns on the first expression in the list that matches. | `pattern_definitions` | no | - | A map of pattern-name and pattern tuples defining custom patterns to be used by the current processor. Patterns matching existing names will override the pre-existing definition. +| `trace_match` | no | false | when true, `_ingest._grok_match_index` will be inserted into your matched document's metadata with the index into the pattern found in `patterns` that matched. |====== Here is an example of using the provided patterns to extract out and name structured fields from a string field in @@ -1069,7 +1070,7 @@ Here is an example pipeline for processing the above document by using Grok: { "grok": { "field": "message", - "pattern": "%{IP:client} %{WORD:method} %{URIPATHPARAM:request} %{NUMBER:bytes} %{NUMBER:duration}" + "patterns": ["%{IP:client} %{WORD:method} %{URIPATHPARAM:request} %{NUMBER:bytes} %{NUMBER:duration}"] } } ] @@ -1107,7 +1108,7 @@ Here is an example of a pipeline specifying custom pattern definitions: { "grok": { "field": "message", - "pattern": "my %{FAVORITE_DOG:dog} is colored %{RGB:color}" + "patterns": ["my %{FAVORITE_DOG:dog} is colored %{RGB:color}"] "pattern_definitions" : { "FAVORITE_DOG" : "beagle", "RGB" : "RED|GREEN|BLUE" diff --git a/modules/ingest-grok/src/main/java/org/elasticsearch/ingest/grok/GrokProcessor.java b/modules/ingest-grok/src/main/java/org/elasticsearch/ingest/grok/GrokProcessor.java index 9237821baba..6a10c8db058 100644 --- a/modules/ingest-grok/src/main/java/org/elasticsearch/ingest/grok/GrokProcessor.java +++ b/modules/ingest-grok/src/main/java/org/elasticsearch/ingest/grok/GrokProcessor.java @@ -25,31 +25,50 @@ import org.elasticsearch.ingest.core.ConfigurationUtils; import org.elasticsearch.ingest.core.IngestDocument; import java.util.HashMap; +import java.util.List; import java.util.Map; +import java.util.Objects; import static org.elasticsearch.ingest.core.ConfigurationUtils.newConfigurationException; public final class GrokProcessor extends AbstractProcessor { public static final String TYPE = "grok"; + private static final String PATTERN_MATCH_KEY = "_ingest._grok_match_index"; private final String matchField; private final Grok grok; + private final boolean traceMatch; - public GrokProcessor(String tag, Grok grok, String matchField) { + public GrokProcessor(String tag, Map patternBank, List matchPatterns, String matchField) { + this(tag, patternBank, matchPatterns, matchField, false); + } + + public GrokProcessor(String tag, Map patternBank, List matchPatterns, String matchField, boolean traceMatch) { super(tag); this.matchField = matchField; - this.grok = grok; + this.grok = new Grok(patternBank, combinePatterns(matchPatterns, traceMatch)); + this.traceMatch = traceMatch; } @Override public void execute(IngestDocument ingestDocument) throws Exception { String fieldValue = ingestDocument.getFieldValue(matchField, String.class); Map matches = grok.captures(fieldValue); - if (matches != null) { - matches.forEach((k, v) -> ingestDocument.setFieldValue(k, v)); - } else { - throw new IllegalArgumentException("Grok expression does not match field value: [" + fieldValue + "]"); + if (matches == null) { + throw new IllegalArgumentException("Provided Grok expressions do not match field value: [" + fieldValue + "]"); + } + + matches.entrySet().stream() + .filter((e) -> Objects.nonNull(e.getValue())) + .forEach((e) -> ingestDocument.setFieldValue(e.getKey(), e.getValue())); + + if (traceMatch) { + @SuppressWarnings("unchecked") + HashMap matchMap = (HashMap) ingestDocument.getFieldValue(PATTERN_MATCH_KEY, Object.class); + matchMap.keySet().stream().findFirst().ifPresent((index) -> { + ingestDocument.setFieldValue(PATTERN_MATCH_KEY, index); + }); } } @@ -58,12 +77,41 @@ public final class GrokProcessor extends AbstractProcessor { return TYPE; } + public Grok getGrok() { + return grok; + } + String getMatchField() { return matchField; } - Grok getGrok() { - return grok; + static String combinePatterns(List patterns, boolean traceMatch) { + String combinedPattern; + if (patterns.size() > 1) { + if (traceMatch) { + combinedPattern = ""; + for (int i = 0; i < patterns.size(); i++) { + String valueWrap = "(?<" + PATTERN_MATCH_KEY + "." + i + ">" + patterns.get(i) + ")"; + if (combinedPattern.equals("")) { + combinedPattern = valueWrap; + } else { + combinedPattern = combinedPattern + "|" + valueWrap; + } + } + } else { + combinedPattern = patterns.stream().reduce("", (prefix, value) -> { + if (prefix.equals("")) { + return "(?:" + value + ")"; + } else { + return prefix + "|" + "(?:" + value + ")"; + } + }); + } + } else { + combinedPattern = patterns.get(0); + } + + return combinedPattern; } public final static class Factory extends AbstractProcessorFactory { @@ -77,22 +125,25 @@ public final class GrokProcessor extends AbstractProcessor { @Override public GrokProcessor doCreate(String processorTag, Map config) throws Exception { String matchField = ConfigurationUtils.readStringProperty(TYPE, processorTag, config, "field"); - String matchPattern = ConfigurationUtils.readStringProperty(TYPE, processorTag, config, "pattern"); + List matchPatterns = ConfigurationUtils.readList(TYPE, processorTag, config, "patterns"); + boolean traceMatch = ConfigurationUtils.readBooleanProperty(TYPE, processorTag, config, "trace_match", false); + + if (matchPatterns.isEmpty()) { + throw newConfigurationException(TYPE, processorTag, "patterns", "List of patterns must not be empty"); + } Map customPatternBank = ConfigurationUtils.readOptionalMap(TYPE, processorTag, config, "pattern_definitions"); Map patternBank = new HashMap<>(builtinPatterns); if (customPatternBank != null) { patternBank.putAll(customPatternBank); } - Grok grok; try { - grok = new Grok(patternBank, matchPattern); + return new GrokProcessor(processorTag, patternBank, matchPatterns, matchField, traceMatch); } catch (Exception e) { - throw newConfigurationException(TYPE, processorTag, "pattern", "Invalid regex pattern. " + e.getMessage()); + throw newConfigurationException(TYPE, processorTag, "patterns", + "Invalid regex pattern found in: " + matchPatterns + ". " + e.getMessage()); } - return new GrokProcessor(processorTag, grok, matchField); + } - } - } diff --git a/modules/ingest-grok/src/test/java/org/elasticsearch/ingest/grok/GrokProcessorFactoryTests.java b/modules/ingest-grok/src/test/java/org/elasticsearch/ingest/grok/GrokProcessorFactoryTests.java index 3880d389c52..ec3b908aac0 100644 --- a/modules/ingest-grok/src/test/java/org/elasticsearch/ingest/grok/GrokProcessorFactoryTests.java +++ b/modules/ingest-grok/src/test/java/org/elasticsearch/ingest/grok/GrokProcessorFactoryTests.java @@ -37,7 +37,7 @@ public class GrokProcessorFactoryTests extends ESTestCase { Map config = new HashMap<>(); config.put("field", "_field"); - config.put("pattern", "(?\\w+)"); + config.put("patterns", Collections.singletonList("(?\\w+)")); String processorTag = randomAsciiOfLength(10); config.put(AbstractProcessorFactory.TAG_KEY, processorTag); GrokProcessor processor = factory.create(config); @@ -49,27 +49,26 @@ public class GrokProcessorFactoryTests extends ESTestCase { public void testBuildMissingField() throws Exception { GrokProcessor.Factory factory = new GrokProcessor.Factory(Collections.emptyMap()); Map config = new HashMap<>(); - config.put("pattern", "(?\\w+)"); - try { - factory.create(config); - fail("should fail"); - } catch (ElasticsearchParseException e) { - assertThat(e.getMessage(), equalTo("[field] required property is missing")); - - } + config.put("patterns", Collections.singletonList("(?\\w+)")); + ElasticsearchParseException e = expectThrows(ElasticsearchParseException.class, () -> factory.create(config)); + assertThat(e.getMessage(), equalTo("[field] required property is missing")); } - public void testBuildMissingPattern() throws Exception { + public void testBuildMissingPatterns() throws Exception { GrokProcessor.Factory factory = new GrokProcessor.Factory(Collections.emptyMap()); Map config = new HashMap<>(); config.put("field", "foo"); - try { - factory.create(config); - fail("should fail"); - } catch (ElasticsearchParseException e) { - assertThat(e.getMessage(), equalTo("[pattern] required property is missing")); - } + ElasticsearchParseException e = expectThrows(ElasticsearchParseException.class, () -> factory.create(config)); + assertThat(e.getMessage(), equalTo("[patterns] required property is missing")); + } + public void testBuildEmptyPatternsList() throws Exception { + GrokProcessor.Factory factory = new GrokProcessor.Factory(Collections.emptyMap()); + Map config = new HashMap<>(); + config.put("field", "foo"); + config.put("patterns", Collections.emptyList()); + ElasticsearchParseException e = expectThrows(ElasticsearchParseException.class, () -> factory.create(config)); + assertThat(e.getMessage(), equalTo("[patterns] List of patterns must not be empty")); } public void testCreateWithCustomPatterns() throws Exception { @@ -77,7 +76,7 @@ public class GrokProcessorFactoryTests extends ESTestCase { Map config = new HashMap<>(); config.put("field", "_field"); - config.put("pattern", "%{MY_PATTERN:name}!"); + config.put("patterns", Collections.singletonList("%{MY_PATTERN:name}!")); config.put("pattern_definitions", Collections.singletonMap("MY_PATTERN", "foo")); GrokProcessor processor = factory.create(config); assertThat(processor.getMatchField(), equalTo("_field")); @@ -89,28 +88,19 @@ public class GrokProcessorFactoryTests extends ESTestCase { GrokProcessor.Factory factory = new GrokProcessor.Factory(Collections.emptyMap()); Map config = new HashMap<>(); config.put("field", "_field"); - config.put("pattern", "["); - try { - factory.create(config); - fail("should fail"); - } catch (ElasticsearchParseException e) { - assertThat(e.getMessage(), equalTo("[pattern] Invalid regex pattern. premature end of char-class")); - } - + config.put("patterns", Collections.singletonList("[")); + ElasticsearchParseException e = expectThrows(ElasticsearchParseException.class, () -> factory.create(config)); + assertThat(e.getMessage(), equalTo("[patterns] Invalid regex pattern found in: [[]. premature end of char-class")); } public void testCreateWithInvalidPatternDefinition() throws Exception { GrokProcessor.Factory factory = new GrokProcessor.Factory(Collections.emptyMap()); Map config = new HashMap<>(); config.put("field", "_field"); - config.put("pattern", "%{MY_PATTERN:name}!"); + config.put("patterns", Collections.singletonList("%{MY_PATTERN:name}!")); config.put("pattern_definitions", Collections.singletonMap("MY_PATTERN", "[")); - try { - factory.create(config); - fail("should fail"); - } catch (ElasticsearchParseException e) { - assertThat(e.getMessage(), equalTo("[pattern] Invalid regex pattern. premature end of char-class")); - } - + ElasticsearchParseException e = expectThrows(ElasticsearchParseException.class, () -> factory.create(config)); + assertThat(e.getMessage(), + equalTo("[patterns] Invalid regex pattern found in: [%{MY_PATTERN:name}!]. premature end of char-class")); } } diff --git a/modules/ingest-grok/src/test/java/org/elasticsearch/ingest/grok/GrokProcessorTests.java b/modules/ingest-grok/src/test/java/org/elasticsearch/ingest/grok/GrokProcessorTests.java index 840cf954022..82354a943ae 100644 --- a/modules/ingest-grok/src/test/java/org/elasticsearch/ingest/grok/GrokProcessorTests.java +++ b/modules/ingest-grok/src/test/java/org/elasticsearch/ingest/grok/GrokProcessorTests.java @@ -21,14 +21,15 @@ package org.elasticsearch.ingest.grok; import org.elasticsearch.ingest.RandomDocumentPicks; import org.elasticsearch.ingest.core.IngestDocument; -import org.elasticsearch.ingest.grok.Grok; -import org.elasticsearch.ingest.grok.GrokProcessor; import org.elasticsearch.test.ESTestCase; +import java.util.Arrays; import java.util.Collections; import java.util.HashMap; +import java.util.Map; import static org.hamcrest.Matchers.equalTo; +import static org.hamcrest.Matchers.nullValue; public class GrokProcessorTests extends ESTestCase { @@ -37,8 +38,8 @@ public class GrokProcessorTests extends ESTestCase { String fieldName = RandomDocumentPicks.randomFieldName(random()); IngestDocument doc = RandomDocumentPicks.randomIngestDocument(random(), new HashMap<>()); doc.setFieldValue(fieldName, "1"); - Grok grok = new Grok(Collections.singletonMap("ONE", "1"), "%{ONE:one}"); - GrokProcessor processor = new GrokProcessor(randomAsciiOfLength(10), grok, fieldName); + GrokProcessor processor = new GrokProcessor(randomAsciiOfLength(10), Collections.singletonMap("ONE", "1"), + Collections.singletonList("%{ONE:one}"), fieldName); processor.execute(doc); assertThat(doc.getFieldValue("one", String.class), equalTo("1")); } @@ -47,14 +48,10 @@ public class GrokProcessorTests extends ESTestCase { String fieldName = RandomDocumentPicks.randomFieldName(random()); IngestDocument doc = RandomDocumentPicks.randomIngestDocument(random(), new HashMap<>()); doc.setFieldValue(fieldName, "23"); - Grok grok = new Grok(Collections.singletonMap("ONE", "1"), "%{ONE:one}"); - GrokProcessor processor = new GrokProcessor(randomAsciiOfLength(10), grok, fieldName); - try { - processor.execute(doc); - fail(); - } catch (Exception e) { - assertThat(e.getMessage(), equalTo("Grok expression does not match field value: [23]")); - } + GrokProcessor processor = new GrokProcessor(randomAsciiOfLength(10), Collections.singletonMap("ONE", "1"), + Collections.singletonList("%{ONE:one}"), fieldName); + Exception e = expectThrows(Exception.class, () -> processor.execute(doc)); + assertThat(e.getMessage(), equalTo("Provided Grok expressions do not match field value: [23]")); } public void testMatchWithoutCaptures() throws Exception { @@ -62,8 +59,8 @@ public class GrokProcessorTests extends ESTestCase { IngestDocument originalDoc = new IngestDocument(new HashMap<>(), new HashMap<>()); originalDoc.setFieldValue(fieldName, fieldName); IngestDocument doc = new IngestDocument(originalDoc); - Grok grok = new Grok(Collections.emptyMap(), fieldName); - GrokProcessor processor = new GrokProcessor(randomAsciiOfLength(10), grok, fieldName); + GrokProcessor processor = new GrokProcessor(randomAsciiOfLength(10), Collections.emptyMap(), + Collections.singletonList(fieldName), fieldName); processor.execute(doc); assertThat(doc, equalTo(originalDoc)); } @@ -72,26 +69,67 @@ public class GrokProcessorTests extends ESTestCase { String fieldName = RandomDocumentPicks.randomFieldName(random()); IngestDocument doc = RandomDocumentPicks.randomIngestDocument(random(), new HashMap<>()); doc.setFieldValue(fieldName, 1); - Grok grok = new Grok(Collections.singletonMap("ONE", "1"), "%{ONE:one}"); - GrokProcessor processor = new GrokProcessor(randomAsciiOfLength(10), grok, fieldName); - try { - processor.execute(doc); - fail(); - } catch (Exception e) { - assertThat(e.getMessage(), equalTo("field [" + fieldName + "] of type [java.lang.Integer] cannot be cast to [java.lang.String]")); - } + GrokProcessor processor = new GrokProcessor(randomAsciiOfLength(10), Collections.singletonMap("ONE", "1"), + Collections.singletonList("%{ONE:one}"), fieldName); + Exception e = expectThrows(Exception.class, () -> processor.execute(doc)); + assertThat(e.getMessage(), equalTo("field [" + fieldName + "] of type [java.lang.Integer] cannot be cast to [java.lang.String]")); } public void testMissingField() { String fieldName = "foo.bar"; IngestDocument doc = RandomDocumentPicks.randomIngestDocument(random(), new HashMap<>()); - Grok grok = new Grok(Collections.singletonMap("ONE", "1"), "%{ONE:one}"); - GrokProcessor processor = new GrokProcessor(randomAsciiOfLength(10), grok, fieldName); - try { - processor.execute(doc); - fail(); - } catch (Exception e) { - assertThat(e.getMessage(), equalTo("field [foo] not present as part of path [foo.bar]")); - } + GrokProcessor processor = new GrokProcessor(randomAsciiOfLength(10), Collections.singletonMap("ONE", "1"), + Collections.singletonList("%{ONE:one}"), fieldName); + Exception e = expectThrows(Exception.class, () -> processor.execute(doc)); + assertThat(e.getMessage(), equalTo("field [foo] not present as part of path [foo.bar]")); + } + + public void testMultiplePatternsWithMatchReturn() throws Exception { + String fieldName = RandomDocumentPicks.randomFieldName(random()); + IngestDocument doc = RandomDocumentPicks.randomIngestDocument(random(), new HashMap<>()); + doc.setFieldValue(fieldName, "2"); + Map patternBank = new HashMap<>(); + patternBank.put("ONE", "1"); + patternBank.put("TWO", "2"); + patternBank.put("THREE", "3"); + GrokProcessor processor = new GrokProcessor(randomAsciiOfLength(10), patternBank, + Arrays.asList("%{ONE:one}", "%{TWO:two}", "%{THREE:three}"), fieldName); + processor.execute(doc); + assertThat(doc.hasField("one"), equalTo(false)); + assertThat(doc.getFieldValue("two", String.class), equalTo("2")); + assertThat(doc.hasField("three"), equalTo(false)); + } + + public void testSetMetadata() throws Exception { + String fieldName = RandomDocumentPicks.randomFieldName(random()); + IngestDocument doc = RandomDocumentPicks.randomIngestDocument(random(), new HashMap<>()); + doc.setFieldValue(fieldName, "abc23"); + Map patternBank = new HashMap<>(); + patternBank.put("ONE", "1"); + patternBank.put("TWO", "2"); + patternBank.put("THREE", "3"); + GrokProcessor processor = new GrokProcessor(randomAsciiOfLength(10), patternBank, + Arrays.asList("%{ONE:one}", "%{TWO:two}", "%{THREE:three}"), fieldName, true); + processor.execute(doc); + assertThat(doc.hasField("one"), equalTo(false)); + assertThat(doc.getFieldValue("two", String.class), equalTo("2")); + assertThat(doc.hasField("three"), equalTo(false)); + assertThat(doc.getFieldValue("_ingest._grok_match_index", String.class), equalTo("1")); + } + + public void testCombinedPatterns() { + String combined; + combined = GrokProcessor.combinePatterns(Arrays.asList(""), false); + assertThat(combined, equalTo("")); + combined = GrokProcessor.combinePatterns(Arrays.asList(""), true); + assertThat(combined, equalTo("")); + combined = GrokProcessor.combinePatterns(Arrays.asList("foo"), false); + assertThat(combined, equalTo("foo")); + combined = GrokProcessor.combinePatterns(Arrays.asList("foo"), true); + assertThat(combined, equalTo("foo")); + combined = GrokProcessor.combinePatterns(Arrays.asList("foo", "bar"), false); + assertThat(combined, equalTo("(?:foo)|(?:bar)")); + combined = GrokProcessor.combinePatterns(Arrays.asList("foo", "bar"), true); + assertThat(combined, equalTo("(?<_ingest._grok_match_index.0>foo)|(?<_ingest._grok_match_index.1>bar)")); } } diff --git a/modules/ingest-grok/src/test/resources/rest-api-spec/test/ingest_grok/20_grok.yaml b/modules/ingest-grok/src/test/resources/rest-api-spec/test/ingest_grok/20_grok.yaml index f88136d8a79..a51f5a102b0 100644 --- a/modules/ingest-grok/src/test/resources/rest-api-spec/test/ingest_grok/20_grok.yaml +++ b/modules/ingest-grok/src/test/resources/rest-api-spec/test/ingest_grok/20_grok.yaml @@ -10,7 +10,7 @@ { "grok" : { "field" : "field1", - "pattern" : "%{NUMBER:val:float} %{NUMBER:status:int} <%{WORD:msg}>" + "patterns" : ["%{NUMBER:val:float} %{NUMBER:status:int} <%{WORD:msg}>"] } } ] @@ -46,7 +46,7 @@ { "grok" : { "field" : "field1", - "pattern" : "<%{MY_PATTERN:msg}>", + "patterns" : ["<%{MY_PATTERN:msg}>"], "pattern_definitions" : { "MY_PATTERN" : "foo" } @@ -83,7 +83,7 @@ { "grok" : { "field" : "field1", - "pattern" : "<%{NUMBER:msg}>", + "patterns" : ["<%{NUMBER:msg}>"], "pattern_definitions" : { "NUMBER" : "foo" } @@ -107,3 +107,43 @@ type: test id: 1 - match: { _source.msg: "foo" } + +--- +"Test simulate with grok debugging enabled": + - do: + ingest.simulate: + body: > + { + "pipeline": { + "description": "_description", + "processors": [ + { + "grok" : { + "field" : "field", + "patterns" : ["%{ONE:one}", "%{TWO:two}"], + "pattern_definitions" : { + "ONE" : "1", + "TWO" : "2" + }, + "trace_match" : true + } + } + ] + }, + "docs": [ + { + "_index": "index", + "_type": "type", + "_id": "id", + "_source": { + "field": "abc2xyz" + } + } + ] + } + - length: { docs: 1 } + - match: { docs.0.doc._source.field: "abc2xyz" } + - match: { docs.0.doc._source.two: "2" } + - length: { docs.0.doc._ingest: 2 } + - match: { docs.0.doc._ingest._grok_match_index: "1" } + - is_true: docs.0.doc._ingest.timestamp diff --git a/qa/smoke-test-ingest-with-all-dependencies/src/test/resources/rest-api-spec/test/ingest/20_combine_processors.yaml b/qa/smoke-test-ingest-with-all-dependencies/src/test/resources/rest-api-spec/test/ingest/20_combine_processors.yaml index 8c09ffb3101..45bda3146a6 100644 --- a/qa/smoke-test-ingest-with-all-dependencies/src/test/resources/rest-api-spec/test/ingest/20_combine_processors.yaml +++ b/qa/smoke-test-ingest-with-all-dependencies/src/test/resources/rest-api-spec/test/ingest/20_combine_processors.yaml @@ -9,7 +9,7 @@ { "grok" : { "field" : "log", - "pattern": "%{COMBINEDAPACHELOG}" + "patterns": ["%{COMBINEDAPACHELOG}"] } }, { @@ -55,7 +55,7 @@ index: test type: test id: 1 - - length: { _source: 14 } + - length: { _source: 13 } - match: { _source.request: "/presentations/logstash-scale11x/images/ahhh___rage_face_by_samusmmx-d5g5zap.png" } - match: { _source.agent: "\"Mozilla/5.0 (Linux; Android 4.2.2; VS980 4G Build/JDQ39B) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/33.0.1750.135 Mobile Safari/537.36\"" } - match: { _source.auth: "-" }