diff --git a/docs/reference/ingest/processors/grok.asciidoc b/docs/reference/ingest/processors/grok.asciidoc index 7593e2b219e..c8b1e927107 100644 --- a/docs/reference/ingest/processors/grok.asciidoc +++ b/docs/reference/ingest/processors/grok.asciidoc @@ -324,3 +324,23 @@ settings: | `ingest.grok.watchdog.interval` | 1s | How often to check whether there are grok evaluations that take longer than the maximum allowed execution time. | `ingest.grok.watchdog.max_execution_time` | 1s | The maximum allowed execution of a grok expression evaluation. |====== + +[[grok-debugging]] +==== Grok debugging + +It is advised to use the {kibana-ref}/xpack-grokdebugger.html[Grok Debugger] to debug grok patterns. From there you can test one or more +patterns in the UI against sample data. Under the covers it uses the same engine as ingest node processor. + +Additionally, it is recommended to enable debug logging for Grok so that any additional messages may also be seen in the Elasticsearch +server log. + +[source,js] +-------------------------------------------------- +PUT _cluster/settings +{ + "transient": { + "logger.org.elasticsearch.ingest.common.GrokProcessor": "debug" + } +} +-------------------------------------------------- +// NOTCONSOLE diff --git a/libs/grok/src/main/java/org/elasticsearch/grok/Grok.java b/libs/grok/src/main/java/org/elasticsearch/grok/Grok.java index c048471d542..5c0af365be4 100644 --- a/libs/grok/src/main/java/org/elasticsearch/grok/Grok.java +++ b/libs/grok/src/main/java/org/elasticsearch/grok/Grok.java @@ -41,6 +41,7 @@ import java.util.Iterator; import java.util.List; import java.util.Locale; import java.util.Map; +import java.util.function.Consumer; public final class Grok { @@ -77,19 +78,20 @@ public final class Grok { private final Regex compiledExpression; private final MatcherWatchdog matcherWatchdog; - public Grok(Map patternBank, String grokPattern) { - this(patternBank, grokPattern, true, MatcherWatchdog.noop()); + public Grok(Map patternBank, String grokPattern, Consumer logCallBack) { + this(patternBank, grokPattern, true, MatcherWatchdog.noop(), logCallBack); } - public Grok(Map patternBank, String grokPattern, MatcherWatchdog matcherWatchdog) { - this(patternBank, grokPattern, true, matcherWatchdog); + public Grok(Map patternBank, String grokPattern, MatcherWatchdog matcherWatchdog, Consumer logCallBack) { + this(patternBank, grokPattern, true, matcherWatchdog, logCallBack); } - Grok(Map patternBank, String grokPattern, boolean namedCaptures) { - this(patternBank, grokPattern, namedCaptures, MatcherWatchdog.noop()); + Grok(Map patternBank, String grokPattern, boolean namedCaptures, Consumer logCallBack) { + this(patternBank, grokPattern, namedCaptures, MatcherWatchdog.noop(), logCallBack); } - private Grok(Map patternBank, String grokPattern, boolean namedCaptures, MatcherWatchdog matcherWatchdog) { + private Grok(Map patternBank, String grokPattern, boolean namedCaptures, MatcherWatchdog matcherWatchdog, + Consumer logCallBack) { this.patternBank = patternBank; this.namedCaptures = namedCaptures; this.matcherWatchdog = matcherWatchdog; @@ -102,7 +104,8 @@ public final class Grok { String expression = toRegex(grokPattern); byte[] expressionBytes = expression.getBytes(StandardCharsets.UTF_8); - this.compiledExpression = new Regex(expressionBytes, 0, expressionBytes.length, Option.DEFAULT, UTF8Encoding.INSTANCE); + this.compiledExpression = new Regex(expressionBytes, 0, expressionBytes.length, Option.DEFAULT, UTF8Encoding.INSTANCE, + message -> logCallBack.accept(message)); } /** diff --git a/libs/grok/src/test/java/org/elasticsearch/grok/GrokTests.java b/libs/grok/src/test/java/org/elasticsearch/grok/GrokTests.java index f25d502b6ef..2f90c245ee3 100644 --- a/libs/grok/src/test/java/org/elasticsearch/grok/GrokTests.java +++ b/libs/grok/src/test/java/org/elasticsearch/grok/GrokTests.java @@ -28,8 +28,10 @@ import java.util.List; import java.util.Map; import java.util.TreeMap; import java.util.concurrent.atomic.AtomicBoolean; +import java.util.concurrent.atomic.AtomicReference; import java.util.function.BiConsumer; +import static org.hamcrest.Matchers.containsString; import static org.hamcrest.Matchers.equalTo; import static org.hamcrest.Matchers.is; import static org.hamcrest.Matchers.nullValue; @@ -40,19 +42,19 @@ public class GrokTests extends ESTestCase { public void testMatchWithoutCaptures() { String line = "value"; - Grok grok = new Grok(basePatterns, "value"); + Grok grok = new Grok(basePatterns, "value", logger::warn); Map matches = grok.captures(line); assertEquals(0, matches.size()); } public void testNoMatchingPatternInDictionary() { - Exception e = expectThrows(IllegalArgumentException.class, () -> new Grok(Collections.emptyMap(), "%{NOTFOUND}")); + Exception e = expectThrows(IllegalArgumentException.class, () -> new Grok(Collections.emptyMap(), "%{NOTFOUND}", logger::warn)); assertThat(e.getMessage(), equalTo("Unable to find pattern [NOTFOUND] in Grok's pattern dictionary")); } public void testSimpleSyslogLine() { String line = "Mar 16 00:01:25 evita postfix/smtpd[1713]: connect from camomile.cloud9.net[168.100.1.3]"; - Grok grok = new Grok(basePatterns, "%{SYSLOGLINE}"); + Grok grok = new Grok(basePatterns, "%{SYSLOGLINE}", logger::warn); Map matches = grok.captures(line); assertEquals("evita", matches.get("logsource")); assertEquals("Mar 16 00:01:25", matches.get("timestamp")); @@ -64,7 +66,7 @@ public class GrokTests extends ESTestCase { public void testSyslog5424Line() { String line = "<191>1 2009-06-30T18:30:00+02:00 paxton.local grokdebug 4123 - [id1 foo=\\\"bar\\\"][id2 baz=\\\"something\\\"] " + "Hello, syslog."; - Grok grok = new Grok(basePatterns, "%{SYSLOG5424LINE}"); + Grok grok = new Grok(basePatterns, "%{SYSLOG5424LINE}", logger::warn); Map matches = grok.captures(line); assertEquals("191", matches.get("syslog5424_pri")); assertEquals("1", matches.get("syslog5424_ver")); @@ -79,13 +81,13 @@ public class GrokTests extends ESTestCase { public void testDatePattern() { String line = "fancy 12-12-12 12:12:12"; - Grok grok = new Grok(basePatterns, "(?%{DATE_EU} %{TIME})"); + Grok grok = new Grok(basePatterns, "(?%{DATE_EU} %{TIME})", logger::warn); Map matches = grok.captures(line); assertEquals("12-12-12 12:12:12", matches.get("timestamp")); } public void testNilCoercedValues() { - Grok grok = new Grok(basePatterns, "test (N/A|%{BASE10NUM:duration:float}ms)"); + Grok grok = new Grok(basePatterns, "test (N/A|%{BASE10NUM:duration:float}ms)", logger::warn); Map matches = grok.captures("test 28.4ms"); assertEquals(28.4f, matches.get("duration")); matches = grok.captures("test N/A"); @@ -93,7 +95,7 @@ public class GrokTests extends ESTestCase { } public void testNilWithNoCoercion() { - Grok grok = new Grok(basePatterns, "test (N/A|%{BASE10NUM:duration}ms)"); + Grok grok = new Grok(basePatterns, "test (N/A|%{BASE10NUM:duration}ms)", logger::warn); Map matches = grok.captures("test 28.4ms"); assertEquals("28.4", matches.get("duration")); matches = grok.captures("test N/A"); @@ -103,7 +105,7 @@ public class GrokTests extends ESTestCase { public void testUnicodeSyslog() { Grok grok = new Grok(basePatterns, "<%{POSINT:syslog_pri}>%{SPACE}%{SYSLOGTIMESTAMP:syslog_timestamp} " + "%{SYSLOGHOST:syslog_hostname} %{PROG:syslog_program}(:?)(?:\\[%{GREEDYDATA:syslog_pid}\\])?(:?) " + - "%{GREEDYDATA:syslog_message}"); + "%{GREEDYDATA:syslog_message}", logger::warn); Map matches = grok.captures("<22>Jan 4 07:50:46 mailmaster postfix/policy-spf[9454]: : " + "SPF permerror (Junk encountered in record 'v=spf1 mx a:mail.domain.no ip4:192.168.0.4 �all'): Envelope-from: " + "email@domain.no"); @@ -113,19 +115,19 @@ public class GrokTests extends ESTestCase { } public void testNamedFieldsWithWholeTextMatch() { - Grok grok = new Grok(basePatterns, "%{DATE_EU:stimestamp}"); + Grok grok = new Grok(basePatterns, "%{DATE_EU:stimestamp}", logger::warn); Map matches = grok.captures("11/01/01"); assertThat(matches.get("stimestamp"), equalTo("11/01/01")); } public void testWithOniguramaNamedCaptures() { - Grok grok = new Grok(basePatterns, "(?\\w+)"); + Grok grok = new Grok(basePatterns, "(?\\w+)", logger::warn); Map matches = grok.captures("hello world"); assertThat(matches.get("foo"), equalTo("hello")); } public void testISO8601() { - Grok grok = new Grok(basePatterns, "^%{TIMESTAMP_ISO8601}$"); + Grok grok = new Grok(basePatterns, "^%{TIMESTAMP_ISO8601}$", logger::warn); List timeMessages = Arrays.asList( "2001-01-01T00:00:00", "1974-03-02T04:09:09", @@ -149,7 +151,7 @@ public class GrokTests extends ESTestCase { } public void testNotISO8601() { - Grok grok = new Grok(basePatterns, "^%{TIMESTAMP_ISO8601}$"); + Grok grok = new Grok(basePatterns, "^%{TIMESTAMP_ISO8601}$", logger::warn); List timeMessages = Arrays.asList( "2001-13-01T00:00:00", // invalid month "2001-00-01T00:00:00", // invalid month @@ -188,7 +190,7 @@ public class GrokTests extends ESTestCase { String text = "wowza !!!Tal!!! - Tal"; String pattern = "%{EXCITED_NAME} - %{NAME}"; - Grok g = new Grok(bank, pattern, false); + Grok g = new Grok(bank, pattern, false, logger::warn); assertEquals("(?!!!(?Tal)!!!) - (?Tal)", g.toRegex(pattern)); assertEquals(true, g.match(text)); @@ -206,7 +208,7 @@ public class GrokTests extends ESTestCase { Map bank = new HashMap<>(); bank.put("NAME", "!!!%{NAME}!!!"); String pattern = "%{NAME}"; - new Grok(bank, pattern, false); + new Grok(bank, pattern, false, logger::warn); }); assertEquals("circular reference in pattern [NAME][!!!%{NAME}!!!]", e.getMessage()); @@ -214,7 +216,7 @@ public class GrokTests extends ESTestCase { Map bank = new HashMap<>(); bank.put("NAME", "!!!%{NAME:name}!!!"); String pattern = "%{NAME}"; - new Grok(bank, pattern, false); + new Grok(bank, pattern, false, logger::warn); }); assertEquals("circular reference in pattern [NAME][!!!%{NAME:name}!!!]", e.getMessage()); @@ -222,7 +224,7 @@ public class GrokTests extends ESTestCase { Map bank = new HashMap<>(); bank.put("NAME", "!!!%{NAME:name:int}!!!"); String pattern = "%{NAME}"; - new Grok(bank, pattern, false); + new Grok(bank, pattern, false, logger::warn); }); assertEquals("circular reference in pattern [NAME][!!!%{NAME:name:int}!!!]", e.getMessage()); @@ -231,7 +233,7 @@ public class GrokTests extends ESTestCase { bank.put("NAME1", "!!!%{NAME2}!!!"); bank.put("NAME2", "!!!%{NAME1}!!!"); String pattern = "%{NAME1}"; - new Grok(bank, pattern, false); + new Grok(bank, pattern, false, logger::warn); }); assertEquals("circular reference in pattern [NAME2][!!!%{NAME1}!!!] back to pattern [NAME1]", e.getMessage()); @@ -241,7 +243,7 @@ public class GrokTests extends ESTestCase { bank.put("NAME2", "!!!%{NAME3}!!!"); bank.put("NAME3", "!!!%{NAME1}!!!"); String pattern = "%{NAME1}"; - new Grok(bank, pattern, false); + new Grok(bank, pattern, false, logger::warn); }); assertEquals("circular reference in pattern [NAME3][!!!%{NAME1}!!!] back to pattern [NAME1] via patterns [NAME2]", e.getMessage()); @@ -254,7 +256,7 @@ public class GrokTests extends ESTestCase { bank.put("NAME4", "!!!%{NAME5}!!!"); bank.put("NAME5", "!!!%{NAME1}!!!"); String pattern = "%{NAME1}"; - new Grok(bank, pattern, false); + new Grok(bank, pattern, false, logger::warn ); }); assertEquals("circular reference in pattern [NAME5][!!!%{NAME1}!!!] back to pattern [NAME1] " + "via patterns [NAME2=>NAME3=>NAME4]", e.getMessage()); @@ -262,7 +264,7 @@ public class GrokTests extends ESTestCase { public void testBooleanCaptures() { String pattern = "%{WORD:name}=%{WORD:status:boolean}"; - Grok g = new Grok(basePatterns, pattern); + Grok g = new Grok(basePatterns, pattern, logger::warn); String text = "active=true"; Map expected = new HashMap<>(); @@ -279,7 +281,7 @@ public class GrokTests extends ESTestCase { bank.put("NUMBER", "(?:%{BASE10NUM})"); String pattern = "%{NUMBER:bytes:float} %{NUMBER:id:long} %{NUMBER:rating:double}"; - Grok g = new Grok(bank, pattern); + Grok g = new Grok(bank, pattern, logger::warn); String text = "12009.34 20000000000 4820.092"; Map expected = new HashMap<>(); @@ -297,7 +299,7 @@ public class GrokTests extends ESTestCase { bank.put("NUMBER", "(?:%{BASE10NUM})"); String pattern = "%{NUMBER:bytes:float} %{NUMBER:status} %{NUMBER}"; - Grok g = new Grok(bank, pattern); + Grok g = new Grok(bank, pattern, logger::warn); String text = "12009.34 200 9032"; Map expected = new HashMap<>(); @@ -312,7 +314,7 @@ public class GrokTests extends ESTestCase { String logLine = "31.184.238.164 - - [24/Jul/2014:05:35:37 +0530] \"GET /logs/access.log HTTP/1.0\" 200 69849 " + "\"http://8rursodiol.enjin.com\" \"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) " + "Chrome/30.0.1599.12785 YaBrowser/13.12.1599.12785 Safari/537.36\" \"www.dlwindianrailways.com\""; - Grok grok = new Grok(basePatterns, "%{COMBINEDAPACHELOG}"); + Grok grok = new Grok(basePatterns, "%{COMBINEDAPACHELOG}", logger::warn); Map matches = grok.captures(logLine); assertEquals("31.184.238.164", matches.get("clientip")); @@ -372,7 +374,7 @@ public class GrokTests extends ESTestCase { String pattern = "%{IPORHOST:clientip} %{USER:ident} %{USER:auth} \\[%{HTTPDATE:timestamp}\\] \"%{WORD:verb} %{DATA:request} " + "HTTP/%{NUMBER:httpversion}\" %{NUMBER:response:int} (?:-|%{NUMBER:bytes:int}) %{QS:referrer} %{QS:agent}"; - Grok grok = new Grok(bank, pattern); + Grok grok = new Grok(bank, pattern, logger::warn); Map expected = new HashMap<>(); expected.put("clientip", "83.149.9.216"); @@ -396,14 +398,14 @@ public class GrokTests extends ESTestCase { public void testNoMatch() { Map bank = new HashMap<>(); bank.put("MONTHDAY", "(?:(?:0[1-9])|(?:[12][0-9])|(?:3[01])|[1-9])"); - Grok grok = new Grok(bank, "%{MONTHDAY:greatday}"); + Grok grok = new Grok(bank, "%{MONTHDAY:greatday}", logger::warn); assertThat(grok.captures("nomatch"), nullValue()); } public void testMultipleNamedCapturesWithSameName() { Map bank = new HashMap<>(); bank.put("SINGLEDIGIT", "[0-9]"); - Grok grok = new Grok(bank, "%{SINGLEDIGIT:num}%{SINGLEDIGIT:num}"); + Grok grok = new Grok(bank, "%{SINGLEDIGIT:num}%{SINGLEDIGIT:num}", logger::warn); Map expected = new HashMap<>(); expected.put("num", "1"); @@ -430,7 +432,8 @@ public class GrokTests extends ESTestCase { }); t.start(); }; - Grok grok = new Grok(basePatterns, grokPattern, MatcherWatchdog.newInstance(10, 200, System::currentTimeMillis, scheduler)); + Grok grok = new Grok(basePatterns, grokPattern, MatcherWatchdog.newInstance(10, 200, System::currentTimeMillis, scheduler), + logger::warn); Exception e = expectThrows(RuntimeException.class, () -> grok.captures(logLine)); run.set(false); assertThat(e.getMessage(), equalTo("grok pattern matching was interrupted after [200] ms")); @@ -470,24 +473,32 @@ public class GrokTests extends ESTestCase { } public void testUnsupportedBracketsInFieldName() { - Grok grok = new Grok(basePatterns, "%{WORD:unsuppo(r)ted}"); + Grok grok = new Grok(basePatterns, "%{WORD:unsuppo(r)ted}", logger::warn); Map matches = grok.captures("line"); assertNull(matches); } public void testJavaClassPatternWithUnderscore() { - Grok grok = new Grok(basePatterns, "%{JAVACLASS}"); + Grok grok = new Grok(basePatterns, "%{JAVACLASS}", logger::warn); assertThat(grok.match("Test_Class.class"), is(true)); } public void testJavaFilePatternWithSpaces() { - Grok grok = new Grok(basePatterns, "%{JAVAFILE}"); + Grok grok = new Grok(basePatterns, "%{JAVAFILE}", logger::warn); assertThat(grok.match("Test Class.java"), is(true)); } + public void testLogCallBack(){ + AtomicReference message = new AtomicReference<>(); + Grok grok = new Grok(basePatterns, ".*\\[.*%{SPACE}*\\].*", message::set); + grok.match("[foo]"); + //this message comes from Joni, so updates to Joni may change the expectation + assertThat(message.get(), containsString("regular expression has redundant nested repeat operator")); + } + private void assertGrokedField(String fieldName) { String line = "foo"; - Grok grok = new Grok(basePatterns, "%{WORD:" + fieldName + "}"); + Grok grok = new Grok(basePatterns, "%{WORD:" + fieldName + "}", logger::warn); Map matches = grok.captures(line); assertEquals(line, matches.get(fieldName)); } diff --git a/modules/ingest-common/build.gradle b/modules/ingest-common/build.gradle index 605096e26cd..e22742e3a9a 100644 --- a/modules/ingest-common/build.gradle +++ b/modules/ingest-common/build.gradle @@ -40,3 +40,16 @@ testClusters.integTest { // (this is because the integTest node is not using default distribution, but only the minimal number of required modules) module project(':modules:lang-mustache').tasks.bundlePlugin.archiveFile } + +thirdPartyAudit.ignoreMissingClasses( + // from log4j + 'org.osgi.framework.AdaptPermission', + 'org.osgi.framework.AdminPermission', + 'org.osgi.framework.Bundle', + 'org.osgi.framework.BundleActivator', + 'org.osgi.framework.BundleContext', + 'org.osgi.framework.BundleEvent', + 'org.osgi.framework.SynchronousBundleListener', + 'org.osgi.framework.wiring.BundleWire', + 'org.osgi.framework.wiring.BundleWiring' +) diff --git a/modules/ingest-common/src/main/java/org/elasticsearch/ingest/common/GrokProcessor.java b/modules/ingest-common/src/main/java/org/elasticsearch/ingest/common/GrokProcessor.java index c603da766a6..ef587d1761d 100644 --- a/modules/ingest-common/src/main/java/org/elasticsearch/ingest/common/GrokProcessor.java +++ b/modules/ingest-common/src/main/java/org/elasticsearch/ingest/common/GrokProcessor.java @@ -19,6 +19,8 @@ package org.elasticsearch.ingest.common; +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; import org.elasticsearch.grok.Grok; import org.elasticsearch.grok.MatcherWatchdog; import org.elasticsearch.ingest.AbstractProcessor; @@ -36,6 +38,7 @@ public final class GrokProcessor extends AbstractProcessor { public static final String TYPE = "grok"; private static final String PATTERN_MATCH_KEY = "_ingest._grok_match_index"; + private static final Logger logger = LogManager.getLogger(GrokProcessor.class); private final String matchField; private final List matchPatterns; @@ -48,9 +51,12 @@ public final class GrokProcessor extends AbstractProcessor { super(tag); this.matchField = matchField; this.matchPatterns = matchPatterns; - this.grok = new Grok(patternBank, combinePatterns(matchPatterns, traceMatch), matcherWatchdog); + this.grok = new Grok(patternBank, combinePatterns(matchPatterns, traceMatch), matcherWatchdog, logger::debug); this.traceMatch = traceMatch; this.ignoreMissing = ignoreMissing; + // Joni warnings are only emitted on an attempt to match, and the warning emitted for every call to match which is too verbose + // so here we emit a warning (if there is one) to the logfile at warn level on construction / processor creation. + new Grok(patternBank, combinePatterns(matchPatterns, traceMatch), matcherWatchdog, logger::warn).match("___nomatch___"); } @Override diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/filestructurefinder/FileStructureUtils.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/filestructurefinder/FileStructureUtils.java index b3e0a14e377..e4623071cc8 100644 --- a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/filestructurefinder/FileStructureUtils.java +++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/filestructurefinder/FileStructureUtils.java @@ -5,6 +5,8 @@ */ package org.elasticsearch.xpack.ml.filestructurefinder; +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; import org.elasticsearch.common.collect.Tuple; import org.elasticsearch.common.util.set.Sets; import org.elasticsearch.grok.Grok; @@ -27,6 +29,7 @@ import java.util.stream.Stream; public final class FileStructureUtils { + private static final Logger logger = LogManager.getLogger(FileStructureUtils.class); public static final String DEFAULT_TIMESTAMP_FIELD = "@timestamp"; public static final String MAPPING_TYPE_SETTING = "type"; public static final String MAPPING_FORMAT_SETTING = "format"; @@ -61,10 +64,11 @@ public final class FileStructureUtils { private static final int NUM_TOP_HITS = 10; // NUMBER Grok pattern doesn't support scientific notation, so we extend it private static final Grok NUMBER_GROK = new Grok(Grok.getBuiltinPatterns(), "^%{NUMBER}(?:[eE][+-]?[0-3]?[0-9]{1,2})?$", - TimeoutChecker.watchdog); - private static final Grok IP_GROK = new Grok(Grok.getBuiltinPatterns(), "^%{IP}$", TimeoutChecker.watchdog); - private static final Grok GEO_POINT_WKT = new Grok(EXTENDED_PATTERNS, "^%{WKT_POINT}$", TimeoutChecker.watchdog); - private static final Grok GEO_WKT = new Grok(EXTENDED_PATTERNS, "^(?:%{WKT_ANY}|%{WKT_GEOMETRYCOLLECTION})$", TimeoutChecker.watchdog); + TimeoutChecker.watchdog, logger::warn); + private static final Grok IP_GROK = new Grok(Grok.getBuiltinPatterns(), "^%{IP}$", TimeoutChecker.watchdog, logger::warn); + private static final Grok GEO_POINT_WKT = new Grok(EXTENDED_PATTERNS, "^%{WKT_POINT}$", TimeoutChecker.watchdog, logger::warn); + private static final Grok GEO_WKT = new Grok(EXTENDED_PATTERNS, "^(?:%{WKT_ANY}|%{WKT_GEOMETRYCOLLECTION})$", TimeoutChecker.watchdog, + logger::warn); private static final int KEYWORD_MAX_LEN = 256; private static final int KEYWORD_MAX_SPACES = 5; diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/filestructurefinder/GrokPatternCreator.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/filestructurefinder/GrokPatternCreator.java index cb2bfeb00f2..4ac7ed57631 100644 --- a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/filestructurefinder/GrokPatternCreator.java +++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/filestructurefinder/GrokPatternCreator.java @@ -5,6 +5,8 @@ */ package org.elasticsearch.xpack.ml.filestructurefinder; +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; import org.elasticsearch.common.collect.Tuple; import org.elasticsearch.grok.Grok; import org.elasticsearch.xpack.core.ml.filestructurefinder.FieldStats; @@ -30,6 +32,7 @@ import java.util.stream.Collectors; */ public final class GrokPatternCreator { + private static final Logger logger = LogManager.getLogger(GrokPatternCreator.class); private static final Map PUNCTUATION_OR_SPACE_NEEDS_ESCAPING; static { HashMap punctuationOrSpaceNeedsEscaping = new HashMap<>(); @@ -498,7 +501,7 @@ public final class GrokPatternCreator { grok = new Grok(grokPatternDefinitions, "(?m)%{DATA:" + PREFACE + "}" + Objects.requireNonNull(preBreak) + "%{" + grokPatternName + ":" + VALUE + "}" + Objects.requireNonNull(postBreak) + "%{GREEDYDATA:" + EPILOGUE + "}", - TimeoutChecker.watchdog); + TimeoutChecker.watchdog, logger::warn); } @Override @@ -592,7 +595,7 @@ public final class GrokPatternCreator { throw new IllegalStateException("Cannot process KV matches until a field name has been determined"); } Grok grok = new Grok(Grok.getBuiltinPatterns(), "(?m)%{DATA:" + PREFACE + "}\\b" + - fieldName + "=%{USER:" + VALUE + "}%{GREEDYDATA:" + EPILOGUE + "}", TimeoutChecker.watchdog); + fieldName + "=%{USER:" + VALUE + "}%{GREEDYDATA:" + EPILOGUE + "}", TimeoutChecker.watchdog, logger::warn); Collection values = new ArrayList<>(); for (String snippet : snippets) { Map captures = grok.captures(snippet); @@ -666,7 +669,7 @@ public final class GrokPatternCreator { private FullMatchGrokPatternCandidate(String grokPattern, String timeField, Map grokPatternDefinitions) { this.grokPattern = grokPattern; this.timeField = timeField; - grok = new Grok(grokPatternDefinitions, grokPattern, TimeoutChecker.watchdog); + grok = new Grok(grokPatternDefinitions, grokPattern, TimeoutChecker.watchdog, logger::warn); } public String getTimeField() { diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/filestructurefinder/TimestampFormatFinder.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/filestructurefinder/TimestampFormatFinder.java index be5b6e9adb1..16b81dae7a2 100644 --- a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/filestructurefinder/TimestampFormatFinder.java +++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/filestructurefinder/TimestampFormatFinder.java @@ -5,6 +5,8 @@ */ package org.elasticsearch.xpack.ml.filestructurefinder; +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; import org.elasticsearch.common.Nullable; import org.elasticsearch.common.SuppressForbidden; import org.elasticsearch.common.collect.Tuple; @@ -48,6 +50,7 @@ public final class TimestampFormatFinder { private static final String PREFACE = "preface"; private static final String EPILOGUE = "epilogue"; + private static final Logger logger = LogManager.getLogger(TimestampFormatFinder.class); private static final String PUNCTUATION_THAT_NEEDS_ESCAPING_IN_REGEX = "\\|()[]{}^$.*?"; private static final String FRACTIONAL_SECOND_SEPARATORS = ":.,"; private static final char INDETERMINATE_FIELD_PLACEHOLDER = '?'; @@ -1400,8 +1403,9 @@ public final class TimestampFormatFinder { this.strictGrokPattern = Objects.requireNonNull(strictGrokPattern); // The (?m) here has the Ruby meaning, which is equivalent to (?s) in Java this.strictSearchGrok = new Grok(Grok.getBuiltinPatterns(), "(?m)%{DATA:" + PREFACE + "}" + strictGrokPattern + - "%{GREEDYDATA:" + EPILOGUE + "}", TimeoutChecker.watchdog); - this.strictFullMatchGrok = new Grok(Grok.getBuiltinPatterns(), "^" + strictGrokPattern + "$", TimeoutChecker.watchdog); + "%{GREEDYDATA:" + EPILOGUE + "}", TimeoutChecker.watchdog, logger::warn); + this.strictFullMatchGrok = new Grok(Grok.getBuiltinPatterns(), "^" + strictGrokPattern + "$", TimeoutChecker.watchdog, + logger::warn); this.outputGrokPatternName = Objects.requireNonNull(outputGrokPatternName); this.quickRuleOutBitSets = quickRuleOutPatterns.stream().map(TimestampFormatFinder::stringToNumberPosBitSet) .collect(Collectors.toList()); diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/job/categorization/GrokPatternCreator.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/job/categorization/GrokPatternCreator.java index e611806682c..76b1c714115 100644 --- a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/job/categorization/GrokPatternCreator.java +++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/job/categorization/GrokPatternCreator.java @@ -272,7 +272,7 @@ public final class GrokPatternCreator { this.grokPatternName = grokPatternName; this.fieldName = fieldName; this.grok = new Grok(Grok.getBuiltinPatterns(), "%{DATA:" + PREFACE + "}" + preBreak + "%{" + grokPatternName + ":this}" + - postBreak + "%{GREEDYDATA:" + EPILOGUE + "}"); + postBreak + "%{GREEDYDATA:" + EPILOGUE + "}", logger::warn); } } } diff --git a/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/filestructurefinder/TimeoutCheckerTests.java b/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/filestructurefinder/TimeoutCheckerTests.java index 66b5470963e..34c9a047a99 100644 --- a/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/filestructurefinder/TimeoutCheckerTests.java +++ b/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/filestructurefinder/TimeoutCheckerTests.java @@ -80,7 +80,7 @@ public class TimeoutCheckerTests extends FileStructureTestCase { } public void testGrokCaptures() throws Exception { - Grok grok = new Grok(Grok.getBuiltinPatterns(), "{%DATA:data}{%GREEDYDATA:greedydata}", TimeoutChecker.watchdog); + Grok grok = new Grok(Grok.getBuiltinPatterns(), "{%DATA:data}{%GREEDYDATA:greedydata}", TimeoutChecker.watchdog, logger::warn); TimeValue timeout = TimeValue.timeValueMillis(1); try (TimeoutChecker timeoutChecker = new TimeoutChecker("grok captures test", timeout, scheduler)) {