From f3721fa88ced468ef1ee796d3289a9f1289d6d60 Mon Sep 17 00:00:00 2001 From: Jake Landis Date: Tue, 5 May 2020 16:52:56 -0500 Subject: [PATCH] [7.x] Prevent stack overflow for numerous grok patterns. (#55899) (#56065) This was noticed for a pipeline that was defining hundreds of grok patterns inline with a single grok processor. The recursive call used to translate a Grok pattern to a regular expression can overflow the stack. This commit converts that method to an iterative method. Co-authored-by: Przemko Robakowski --- .../java/org/elasticsearch/grok/Grok.java | 42 ++++++++++--------- 1 file changed, 23 insertions(+), 19 deletions(-) diff --git a/libs/grok/src/main/java/org/elasticsearch/grok/Grok.java b/libs/grok/src/main/java/org/elasticsearch/grok/Grok.java index 7875f736f76..c048471d542 100644 --- a/libs/grok/src/main/java/org/elasticsearch/grok/Grok.java +++ b/libs/grok/src/main/java/org/elasticsearch/grok/Grok.java @@ -62,6 +62,7 @@ public final class Grok { GROK_PATTERN.getBytes(StandardCharsets.UTF_8).length, Option.NONE, UTF8Encoding.INSTANCE, Syntax.DEFAULT); private static final Map builtinPatterns; + private static final int MAX_TO_REGEX_ITERATIONS = 100_000; //sanity limit static { try { @@ -167,17 +168,23 @@ public final class Grok { * @return named regex expression */ public String toRegex(String grokPattern) { - byte[] grokPatternBytes = grokPattern.getBytes(StandardCharsets.UTF_8); - Matcher matcher = GROK_PATTERN_REGEX.matcher(grokPatternBytes); + StringBuilder res = new StringBuilder(); + for (int i = 0; i < MAX_TO_REGEX_ITERATIONS; i++) { + byte[] grokPatternBytes = grokPattern.getBytes(StandardCharsets.UTF_8); + Matcher matcher = GROK_PATTERN_REGEX.matcher(grokPatternBytes); + + int result; + try { + matcherWatchdog.register(matcher); + result = matcher.search(0, grokPatternBytes.length, Option.NONE); + } finally { + matcherWatchdog.unregister(matcher); + } + + if (result < 0) { + return res.append(grokPattern).toString(); + } - int result; - try { - matcherWatchdog.register(matcher); - result = matcher.search(0, grokPatternBytes.length, Option.NONE); - } finally { - matcherWatchdog.unregister(matcher); - } - if (result >= 0) { Region region = matcher.getEagerRegion(); String namedPatternRef = groupMatch(NAME_GROUP, region, grokPattern); String subName = groupMatch(SUBNAME_GROUP, region, grokPattern); @@ -185,7 +192,6 @@ public final class Grok { @SuppressWarnings("unused") String definition = groupMatch(DEFINITION_GROUP, region, grokPattern); String patternName = groupMatch(PATTERN_GROUP, region, grokPattern); - String pattern = patternBank.get(patternName); if (pattern == null) { throw new IllegalArgumentException("Unable to find pattern [" + patternName + "] in Grok's pattern dictionary"); @@ -193,22 +199,20 @@ public final class Grok { if (pattern.contains("%{" + patternName + "}") || pattern.contains("%{" + patternName + ":")) { throw new IllegalArgumentException("circular reference in pattern back [" + patternName + "]"); } - String grokPart; if (namedCaptures && subName != null) { grokPart = String.format(Locale.US, "(?<%s>%s)", namedPatternRef, pattern); - } else if (!namedCaptures) { - grokPart = String.format(Locale.US, "(?<%s>%s)", patternName + "_" + String.valueOf(result), pattern); - } else { + } else if (namedCaptures) { grokPart = String.format(Locale.US, "(?:%s)", pattern); + } else { + grokPart = String.format(Locale.US, "(?<%s>%s)", patternName + "_" + result, pattern); } - String start = new String(grokPatternBytes, 0, result, StandardCharsets.UTF_8); String rest = new String(grokPatternBytes, region.end[0], grokPatternBytes.length - region.end[0], StandardCharsets.UTF_8); - return start + toRegex(grokPart + rest); + grokPattern = grokPart + rest; + res.append(start); } - - return grokPattern; + throw new IllegalArgumentException("Can not convert grok patterns to regular expression"); } /**