fix trace_match behavior for when there is only one grok pattern (#21413)
There is an issue in the Grok Processor, where trace_match: true does not inject the _ingest._grok_match_index into the ingest-document when there is just one pattern provided. This is due to an optimization in the regex construction. This commit adds a check for when this is the case, and injects a static index value of "0", since there is only one pattern matched (at the first index into the patterns). To make this clearer, more documentation was added to the grok-processor docs. Fixes #21371.
This commit is contained in:
parent
6baded8e7f
commit
04b712bdc5
|
@ -931,14 +931,14 @@ and the result:
|
||||||
"date1" : "2016-04-25T12:02:01.789Z"
|
"date1" : "2016-04-25T12:02:01.789Z"
|
||||||
},
|
},
|
||||||
"_ingest" : {
|
"_ingest" : {
|
||||||
"timestamp" : "2016-08-11T12:00:01.222Z"
|
"timestamp" : "2016-11-08T19:43:03.850+0000"
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
--------------------------------------------------
|
--------------------------------------------------
|
||||||
// TESTRESPONSE[s/2016-08-11T12:00:01.222Z/$body.docs.0.doc._ingest.timestamp/]
|
// TESTRESPONSE[s/2016-11-08T19:43:03.850\+0000/$body.docs.0.doc._ingest.timestamp/]
|
||||||
|
|
||||||
The above example shows that `_index` was set to `<myindex-{2016-04-25||/M{yyyy-MM-dd|UTC}}>`. Elasticsearch
|
The above example shows that `_index` was set to `<myindex-{2016-04-25||/M{yyyy-MM-dd|UTC}}>`. Elasticsearch
|
||||||
understands this to mean `2016-04-01` as is explained in the <<date-math-index-names, date math index name documentation>>
|
understands this to mean `2016-04-01` as is explained in the <<date-math-index-names, date math index name documentation>>
|
||||||
|
@ -1278,6 +1278,139 @@ Here is an example of a pipeline specifying custom pattern definitions:
|
||||||
}
|
}
|
||||||
--------------------------------------------------
|
--------------------------------------------------
|
||||||
|
|
||||||
|
[[trace-match]]
|
||||||
|
==== Providing Multiple Match Patterns
|
||||||
|
|
||||||
|
Sometimes one pattern is not enough to capture the potential structure of a field. Let's assume we
|
||||||
|
want to match all messages that contain your favorite pet breeds of either cats or dogs. One way to accomplish
|
||||||
|
this is to provide two distinct patterns that can be matched, instead of one really complicated expression capturing
|
||||||
|
the same `or` behavior.
|
||||||
|
|
||||||
|
Here is an example of such a configuration executed against the simulate API:
|
||||||
|
|
||||||
|
[source,js]
|
||||||
|
--------------------------------------------------
|
||||||
|
POST _ingest/pipeline/_simulate
|
||||||
|
{
|
||||||
|
"pipeline": {
|
||||||
|
"description" : "parse multiple patterns",
|
||||||
|
"processors": [
|
||||||
|
{
|
||||||
|
"grok": {
|
||||||
|
"field": "message",
|
||||||
|
"patterns": ["%{FAVORITE_DOG:pet}", "%{FAVORITE_CAT:pet}"],
|
||||||
|
"pattern_definitions" : {
|
||||||
|
"FAVORITE_DOG" : "beagle",
|
||||||
|
"FAVORITE_CAT" : "burmese"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"docs":[
|
||||||
|
{
|
||||||
|
"_source": {
|
||||||
|
"message": "I love burmese cats!"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
--------------------------------------------------
|
||||||
|
// CONSOLE
|
||||||
|
|
||||||
|
response:
|
||||||
|
|
||||||
|
[source,js]
|
||||||
|
--------------------------------------------------
|
||||||
|
{
|
||||||
|
"docs": [
|
||||||
|
{
|
||||||
|
"doc": {
|
||||||
|
"_type": "_type",
|
||||||
|
"_index": "_index",
|
||||||
|
"_id": "_id",
|
||||||
|
"_source": {
|
||||||
|
"message": "I love burmese cats!",
|
||||||
|
"pet": "burmese"
|
||||||
|
},
|
||||||
|
"_ingest": {
|
||||||
|
"timestamp": "2016-11-08T19:43:03.850+0000"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
--------------------------------------------------
|
||||||
|
// TESTRESPONSE[s/2016-11-08T19:43:03.850\+0000/$body.docs.0.doc._ingest.timestamp/]
|
||||||
|
|
||||||
|
Both patterns will set the field `pet` with the appropriate match, but what if we want to trace which of our
|
||||||
|
patterns matched and populated our fields? We can do this with the `trace_match` parameter. Here is the output of
|
||||||
|
that same pipeline, but with `"trace_match": true` configured:
|
||||||
|
|
||||||
|
////
|
||||||
|
Hidden setup for example:
|
||||||
|
[source,js]
|
||||||
|
--------------------------------------------------
|
||||||
|
POST _ingest/pipeline/_simulate
|
||||||
|
{
|
||||||
|
"pipeline": {
|
||||||
|
"description" : "parse multiple patterns",
|
||||||
|
"processors": [
|
||||||
|
{
|
||||||
|
"grok": {
|
||||||
|
"field": "message",
|
||||||
|
"patterns": ["%{FAVORITE_DOG:pet}", "%{FAVORITE_CAT:pet}"],
|
||||||
|
"trace_match": true,
|
||||||
|
"pattern_definitions" : {
|
||||||
|
"FAVORITE_DOG" : "beagle",
|
||||||
|
"FAVORITE_CAT" : "burmese"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"docs":[
|
||||||
|
{
|
||||||
|
"_source": {
|
||||||
|
"message": "I love burmese cats!"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
--------------------------------------------------
|
||||||
|
// CONSOLE
|
||||||
|
////
|
||||||
|
|
||||||
|
[source,js]
|
||||||
|
--------------------------------------------------
|
||||||
|
{
|
||||||
|
"docs": [
|
||||||
|
{
|
||||||
|
"doc": {
|
||||||
|
"_type": "_type",
|
||||||
|
"_index": "_index",
|
||||||
|
"_id": "_id",
|
||||||
|
"_source": {
|
||||||
|
"message": "I love burmese cats!",
|
||||||
|
"pet": "burmese"
|
||||||
|
},
|
||||||
|
"_ingest": {
|
||||||
|
"_grok_match_index": "1",
|
||||||
|
"timestamp": "2016-11-08T19:43:03.850+0000"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
--------------------------------------------------
|
||||||
|
// TESTRESPONSE[s/2016-11-08T19:43:03.850\+0000/$body.docs.0.doc._ingest.timestamp/]
|
||||||
|
|
||||||
|
In the above response, you can see that the index of the pattern that matched was `"1"`. This is to say that it was the
|
||||||
|
second (index starts at zero) pattern in `patterns` to match.
|
||||||
|
|
||||||
|
This trace metadata enables debugging which of the patterns matched. This information is stored in the ingest
|
||||||
|
metadata and will not be indexed.
|
||||||
|
|
||||||
[[gsub-processor]]
|
[[gsub-processor]]
|
||||||
=== Gsub Processor
|
=== Gsub Processor
|
||||||
Converts a string field by applying a regular expression and a replacement.
|
Converts a string field by applying a regular expression and a replacement.
|
||||||
|
|
|
@ -37,6 +37,7 @@ public final class GrokProcessor extends AbstractProcessor {
|
||||||
private static final String PATTERN_MATCH_KEY = "_ingest._grok_match_index";
|
private static final String PATTERN_MATCH_KEY = "_ingest._grok_match_index";
|
||||||
|
|
||||||
private final String matchField;
|
private final String matchField;
|
||||||
|
private final List<String> matchPatterns;
|
||||||
private final Grok grok;
|
private final Grok grok;
|
||||||
private final boolean traceMatch;
|
private final boolean traceMatch;
|
||||||
private final boolean ignoreMissing;
|
private final boolean ignoreMissing;
|
||||||
|
@ -45,6 +46,7 @@ public final class GrokProcessor extends AbstractProcessor {
|
||||||
boolean traceMatch, boolean ignoreMissing) {
|
boolean traceMatch, boolean ignoreMissing) {
|
||||||
super(tag);
|
super(tag);
|
||||||
this.matchField = matchField;
|
this.matchField = matchField;
|
||||||
|
this.matchPatterns = matchPatterns;
|
||||||
this.grok = new Grok(patternBank, combinePatterns(matchPatterns, traceMatch));
|
this.grok = new Grok(patternBank, combinePatterns(matchPatterns, traceMatch));
|
||||||
this.traceMatch = traceMatch;
|
this.traceMatch = traceMatch;
|
||||||
this.ignoreMissing = ignoreMissing;
|
this.ignoreMissing = ignoreMissing;
|
||||||
|
@ -79,11 +81,15 @@ public final class GrokProcessor extends AbstractProcessor {
|
||||||
.forEach((e) -> ingestDocument.setFieldValue(e.getKey(), e.getValue()));
|
.forEach((e) -> ingestDocument.setFieldValue(e.getKey(), e.getValue()));
|
||||||
|
|
||||||
if (traceMatch) {
|
if (traceMatch) {
|
||||||
@SuppressWarnings("unchecked")
|
if (matchPatterns.size() > 1) {
|
||||||
HashMap<String, String> matchMap = (HashMap<String, String>) ingestDocument.getFieldValue(PATTERN_MATCH_KEY, Object.class);
|
@SuppressWarnings("unchecked")
|
||||||
matchMap.keySet().stream().findFirst().ifPresent((index) -> {
|
HashMap<String, String> matchMap = (HashMap<String, String>) ingestDocument.getFieldValue(PATTERN_MATCH_KEY, Object.class);
|
||||||
ingestDocument.setFieldValue(PATTERN_MATCH_KEY, index);
|
matchMap.keySet().stream().findFirst().ifPresent((index) -> {
|
||||||
});
|
ingestDocument.setFieldValue(PATTERN_MATCH_KEY, index);
|
||||||
|
});
|
||||||
|
} else {
|
||||||
|
ingestDocument.setFieldValue(PATTERN_MATCH_KEY, "0");
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -104,6 +110,10 @@ public final class GrokProcessor extends AbstractProcessor {
|
||||||
return matchField;
|
return matchField;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
List<String> getMatchPatterns() {
|
||||||
|
return matchPatterns;
|
||||||
|
}
|
||||||
|
|
||||||
static String combinePatterns(List<String> patterns, boolean traceMatch) {
|
static String combinePatterns(List<String> patterns, boolean traceMatch) {
|
||||||
String combinedPattern;
|
String combinedPattern;
|
||||||
if (patterns.size() > 1) {
|
if (patterns.size() > 1) {
|
||||||
|
|
|
@ -158,6 +158,19 @@ public class GrokProcessorTests extends ESTestCase {
|
||||||
assertThat(doc.getFieldValue("_ingest._grok_match_index", String.class), equalTo("1"));
|
assertThat(doc.getFieldValue("_ingest._grok_match_index", String.class), equalTo("1"));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void testTraceWithOnePattern() throws Exception {
|
||||||
|
String fieldName = RandomDocumentPicks.randomFieldName(random());
|
||||||
|
IngestDocument doc = RandomDocumentPicks.randomIngestDocument(random(), new HashMap<>());
|
||||||
|
doc.setFieldValue(fieldName, "first1");
|
||||||
|
Map<String, String> patternBank = new HashMap<>();
|
||||||
|
patternBank.put("ONE", "1");
|
||||||
|
GrokProcessor processor = new GrokProcessor(randomAsciiOfLength(10), patternBank,
|
||||||
|
Arrays.asList("%{ONE:one}"), fieldName, true, false);
|
||||||
|
processor.execute(doc);
|
||||||
|
assertThat(doc.hasField("one"), equalTo(true));
|
||||||
|
assertThat(doc.getFieldValue("_ingest._grok_match_index", String.class), equalTo("0"));
|
||||||
|
}
|
||||||
|
|
||||||
public void testCombinedPatterns() {
|
public void testCombinedPatterns() {
|
||||||
String combined;
|
String combined;
|
||||||
combined = GrokProcessor.combinePatterns(Arrays.asList(""), false);
|
combined = GrokProcessor.combinePatterns(Arrays.asList(""), false);
|
||||||
|
|
Loading…
Reference in New Issue