From f56097b57a4d9d6fb5e7f1d988fb84e36ee2cf9f Mon Sep 17 00:00:00 2001 From: Tal Levy Date: Tue, 13 Dec 2016 13:19:55 -0800 Subject: [PATCH] Fixes GrokProcessor's ignorance of named-captures with same name. (#22131) Grok was originally ignoring potential matches to named-capture groups larger than one. For example, If you had two patterns containing the same named field, but only the second pattern matched, it would fail to pick this up. This PR fixes this by exploring all potential places where a named-capture was used and chooses the first one that matched. Fixes #22117. --- .../org/elasticsearch/ingest/common/Grok.java | 29 +++++++++---- .../ingest/common/GrokProcessor.java | 31 ++++++-------- .../ingest/common/GrokProcessorTests.java | 41 +++++++++++++++++++ .../ingest/common/GrokTests.java | 10 +++++ 4 files changed, 86 insertions(+), 25 deletions(-) diff --git a/modules/ingest-common/src/main/java/org/elasticsearch/ingest/common/Grok.java b/modules/ingest-common/src/main/java/org/elasticsearch/ingest/common/Grok.java index b6e1877d3e0..7120643f47c 100644 --- a/modules/ingest-common/src/main/java/org/elasticsearch/ingest/common/Grok.java +++ b/modules/ingest-common/src/main/java/org/elasticsearch/ingest/common/Grok.java @@ -29,6 +29,7 @@ import org.joni.Syntax; import org.joni.exception.ValueException; import java.nio.charset.StandardCharsets; +import java.util.Arrays; import java.util.HashMap; import java.util.Iterator; import java.util.Locale; @@ -125,12 +126,25 @@ final class Grok { return grokPattern; } + /** + * Checks whether a specific text matches the defined grok expression. + * + * @param text the string to match + * @return true if grok expression matches text, false otherwise. + */ public boolean match(String text) { Matcher matcher = compiledExpression.matcher(text.getBytes(StandardCharsets.UTF_8)); int result = matcher.search(0, text.length(), Option.DEFAULT); return (result != -1); } + /** + * Matches and returns any named captures within a compiled grok expression that matched + * within the provided text. + * + * @param text the text to match and extract values from. + * @return a map containing field names and their respective coerced values that matched. + */ public Map captures(String text) { byte[] textAsBytes = text.getBytes(StandardCharsets.UTF_8); Map fields = new HashMap<>(); @@ -140,16 +154,17 @@ final class Grok { Region region = matcher.getEagerRegion(); for (Iterator entry = compiledExpression.namedBackrefIterator(); entry.hasNext();) { NameEntry e = entry.next(); - int number = e.getBackRefs()[0]; - String groupName = new String(e.name, e.nameP, e.nameEnd - e.nameP, StandardCharsets.UTF_8); - String matchValue = null; - if (region.beg[number] >= 0) { - matchValue = new String(textAsBytes, region.beg[number], region.end[number] - region.beg[number], + for (int number : e.getBackRefs()) { + if (region.beg[number] >= 0) { + String matchValue = new String(textAsBytes, region.beg[number], region.end[number] - region.beg[number], StandardCharsets.UTF_8); + GrokMatchGroup match = new GrokMatchGroup(groupName, matchValue); + fields.put(match.getName(), match.getValue()); + break; + } } - GrokMatchGroup match = new GrokMatchGroup(groupName, matchValue); - fields.put(match.getName(), match.getValue()); + } return fields; } else if (result != -1) { diff --git a/modules/ingest-common/src/main/java/org/elasticsearch/ingest/common/GrokProcessor.java b/modules/ingest-common/src/main/java/org/elasticsearch/ingest/common/GrokProcessor.java index f271da198f8..f8eb49b9239 100644 --- a/modules/ingest-common/src/main/java/org/elasticsearch/ingest/common/GrokProcessor.java +++ b/modules/ingest-common/src/main/java/org/elasticsearch/ingest/common/GrokProcessor.java @@ -68,7 +68,6 @@ public final class GrokProcessor extends AbstractProcessor { } matches.entrySet().stream() - .filter((e) -> Objects.nonNull(e.getValue())) .forEach((e) -> ingestDocument.setFieldValue(e.getKey(), e.getValue())); if (traceMatch) { @@ -108,24 +107,20 @@ public final class GrokProcessor extends AbstractProcessor { static String combinePatterns(List patterns, boolean traceMatch) { String combinedPattern; if (patterns.size() > 1) { - if (traceMatch) { - combinedPattern = ""; - for (int i = 0; i < patterns.size(); i++) { - String valueWrap = "(?<" + PATTERN_MATCH_KEY + "." + i + ">" + patterns.get(i) + ")"; - if (combinedPattern.equals("")) { - combinedPattern = valueWrap; - } else { - combinedPattern = combinedPattern + "|" + valueWrap; - } + combinedPattern = ""; + for (int i = 0; i < patterns.size(); i++) { + String pattern = patterns.get(i); + String valueWrap; + if (traceMatch) { + valueWrap = "(?<" + PATTERN_MATCH_KEY + "." + i + ">" + pattern + ")"; + } else { + valueWrap = "(?:" + patterns.get(i) + ")"; + } + if (combinedPattern.equals("")) { + combinedPattern = valueWrap; + } else { + combinedPattern = combinedPattern + "|" + valueWrap; } - } else { - combinedPattern = patterns.stream().reduce("", (prefix, value) -> { - if (prefix.equals("")) { - return "(?:" + value + ")"; - } else { - return prefix + "|" + "(?:" + value + ")"; - } - }); } } else { combinedPattern = patterns.get(0); diff --git a/modules/ingest-common/src/test/java/org/elasticsearch/ingest/common/GrokProcessorTests.java b/modules/ingest-common/src/test/java/org/elasticsearch/ingest/common/GrokProcessorTests.java index 25cdb91387d..24d775db682 100644 --- a/modules/ingest-common/src/test/java/org/elasticsearch/ingest/common/GrokProcessorTests.java +++ b/modules/ingest-common/src/test/java/org/elasticsearch/ingest/common/GrokProcessorTests.java @@ -186,4 +186,45 @@ public class GrokProcessorTests extends ESTestCase { combined = GrokProcessor.combinePatterns(Arrays.asList("foo", "bar"), true); assertThat(combined, equalTo("(?<_ingest._grok_match_index.0>foo)|(?<_ingest._grok_match_index.1>bar)")); } + + public void testCombineSamePatternNameAcrossPatterns() throws Exception { + String fieldName = RandomDocumentPicks.randomFieldName(random()); + IngestDocument doc = RandomDocumentPicks.randomIngestDocument(random(), new HashMap<>()); + doc.setFieldValue(fieldName, "1-3"); + Map patternBank = new HashMap<>(); + patternBank.put("ONE", "1"); + patternBank.put("TWO", "2"); + patternBank.put("THREE", "3"); + GrokProcessor processor = new GrokProcessor(randomAsciiOfLength(10), patternBank, + Arrays.asList("%{ONE:first}-%{TWO:second}", "%{ONE:first}-%{THREE:second}"), fieldName, randomBoolean(), randomBoolean()); + processor.execute(doc); + assertThat(doc.getFieldValue("first", String.class), equalTo("1")); + assertThat(doc.getFieldValue("second", String.class), equalTo("3")); + } + + public void testFirstWinNamedCapture() throws Exception { + String fieldName = RandomDocumentPicks.randomFieldName(random()); + IngestDocument doc = RandomDocumentPicks.randomIngestDocument(random(), new HashMap<>()); + doc.setFieldValue(fieldName, "12"); + Map patternBank = new HashMap<>(); + patternBank.put("ONETWO", "1|2"); + GrokProcessor processor = new GrokProcessor(randomAsciiOfLength(10), patternBank, + Collections.singletonList("%{ONETWO:first}%{ONETWO:first}"), fieldName, randomBoolean(), randomBoolean()); + processor.execute(doc); + assertThat(doc.getFieldValue("first", String.class), equalTo("1")); + } + + public void testUnmatchedNamesNotIncludedInDocument() throws Exception { + String fieldName = RandomDocumentPicks.randomFieldName(random()); + IngestDocument doc = RandomDocumentPicks.randomIngestDocument(random(), new HashMap<>()); + doc.setFieldValue(fieldName, "3"); + Map patternBank = new HashMap<>(); + patternBank.put("ONETWO", "1|2"); + patternBank.put("THREE", "3"); + GrokProcessor processor = new GrokProcessor(randomAsciiOfLength(10), patternBank, + Collections.singletonList("%{ONETWO:first}|%{THREE:second}"), fieldName, randomBoolean(), randomBoolean()); + processor.execute(doc); + assertFalse(doc.hasField("first")); + assertThat(doc.getFieldValue("second", String.class), equalTo("3")); + } } diff --git a/modules/ingest-common/src/test/java/org/elasticsearch/ingest/common/GrokTests.java b/modules/ingest-common/src/test/java/org/elasticsearch/ingest/common/GrokTests.java index e8b88ea0d12..8874853dcd1 100644 --- a/modules/ingest-common/src/test/java/org/elasticsearch/ingest/common/GrokTests.java +++ b/modules/ingest-common/src/test/java/org/elasticsearch/ingest/common/GrokTests.java @@ -307,4 +307,14 @@ public class GrokTests extends ESTestCase { Grok grok = new Grok(bank, "%{MONTHDAY:greatday}"); assertThat(grok.captures("nomatch"), nullValue()); } + + public void testMultipleNamedCapturesWithSameName() { + Map bank = new HashMap<>(); + bank.put("SINGLEDIGIT", "[0-9]"); + Grok grok = new Grok(bank, "%{SINGLEDIGIT:num}%{SINGLEDIGIT:num}"); + + Map expected = new HashMap<>(); + expected.put("num", "1"); + assertThat(grok.captures("12"), equalTo(expected)); + } }