When Joni, the regex engine that powers grok emits a warning it does so by default to System.err. System.err logs are all bucketed together in the server log at WARN level. When Joni emits a warning, it can be extremely verbose, logging a message for each execution again that pattern. For ingest node that means for every document that is run that through Grok. Fortunately, Joni provides a call back hook to push these warnings to a custom location. This commit implements Joni's callback hook to push the Joni warning to the Elasticsearch server logger (logger.org.elasticsearch.ingest.common.GrokProcessor) at debug level. Generally these warning indicate a possible issue with the regular expression and upon creation of the Grok processor will do a "test run" of the expression and log the result (if any) at WARN level. This WARN level log should only occur on pipeline creation which is a much lower frequency then every document. Additionally, the documentation is updated with instructions for how to set the logger to debug level.
This commit is contained in:
parent
aab6317260
commit
a370d5eead
|
@ -324,3 +324,23 @@ settings:
|
|||
| `ingest.grok.watchdog.interval` | 1s | How often to check whether there are grok evaluations that take longer than the maximum allowed execution time.
|
||||
| `ingest.grok.watchdog.max_execution_time` | 1s | The maximum allowed execution of a grok expression evaluation.
|
||||
|======
|
||||
|
||||
[[grok-debugging]]
|
||||
==== Grok debugging
|
||||
|
||||
It is advised to use the {kibana-ref}/xpack-grokdebugger.html[Grok Debugger] to debug grok patterns. From there you can test one or more
|
||||
patterns in the UI against sample data. Under the covers it uses the same engine as ingest node processor.
|
||||
|
||||
Additionally, it is recommended to enable debug logging for Grok so that any additional messages may also be seen in the Elasticsearch
|
||||
server log.
|
||||
|
||||
[source,js]
|
||||
--------------------------------------------------
|
||||
PUT _cluster/settings
|
||||
{
|
||||
"transient": {
|
||||
"logger.org.elasticsearch.ingest.common.GrokProcessor": "debug"
|
||||
}
|
||||
}
|
||||
--------------------------------------------------
|
||||
// NOTCONSOLE
|
||||
|
|
|
@ -41,6 +41,7 @@ import java.util.Iterator;
|
|||
import java.util.List;
|
||||
import java.util.Locale;
|
||||
import java.util.Map;
|
||||
import java.util.function.Consumer;
|
||||
|
||||
public final class Grok {
|
||||
|
||||
|
@ -77,19 +78,20 @@ public final class Grok {
|
|||
private final Regex compiledExpression;
|
||||
private final MatcherWatchdog matcherWatchdog;
|
||||
|
||||
public Grok(Map<String, String> patternBank, String grokPattern) {
|
||||
this(patternBank, grokPattern, true, MatcherWatchdog.noop());
|
||||
public Grok(Map<String, String> patternBank, String grokPattern, Consumer<String> logCallBack) {
|
||||
this(patternBank, grokPattern, true, MatcherWatchdog.noop(), logCallBack);
|
||||
}
|
||||
|
||||
public Grok(Map<String, String> patternBank, String grokPattern, MatcherWatchdog matcherWatchdog) {
|
||||
this(patternBank, grokPattern, true, matcherWatchdog);
|
||||
public Grok(Map<String, String> patternBank, String grokPattern, MatcherWatchdog matcherWatchdog, Consumer<String> logCallBack) {
|
||||
this(patternBank, grokPattern, true, matcherWatchdog, logCallBack);
|
||||
}
|
||||
|
||||
Grok(Map<String, String> patternBank, String grokPattern, boolean namedCaptures) {
|
||||
this(patternBank, grokPattern, namedCaptures, MatcherWatchdog.noop());
|
||||
Grok(Map<String, String> patternBank, String grokPattern, boolean namedCaptures, Consumer<String> logCallBack) {
|
||||
this(patternBank, grokPattern, namedCaptures, MatcherWatchdog.noop(), logCallBack);
|
||||
}
|
||||
|
||||
private Grok(Map<String, String> patternBank, String grokPattern, boolean namedCaptures, MatcherWatchdog matcherWatchdog) {
|
||||
private Grok(Map<String, String> patternBank, String grokPattern, boolean namedCaptures, MatcherWatchdog matcherWatchdog,
|
||||
Consumer<String> logCallBack) {
|
||||
this.patternBank = patternBank;
|
||||
this.namedCaptures = namedCaptures;
|
||||
this.matcherWatchdog = matcherWatchdog;
|
||||
|
@ -102,7 +104,8 @@ public final class Grok {
|
|||
|
||||
String expression = toRegex(grokPattern);
|
||||
byte[] expressionBytes = expression.getBytes(StandardCharsets.UTF_8);
|
||||
this.compiledExpression = new Regex(expressionBytes, 0, expressionBytes.length, Option.DEFAULT, UTF8Encoding.INSTANCE);
|
||||
this.compiledExpression = new Regex(expressionBytes, 0, expressionBytes.length, Option.DEFAULT, UTF8Encoding.INSTANCE,
|
||||
message -> logCallBack.accept(message));
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
@ -28,8 +28,10 @@ import java.util.List;
|
|||
import java.util.Map;
|
||||
import java.util.TreeMap;
|
||||
import java.util.concurrent.atomic.AtomicBoolean;
|
||||
import java.util.concurrent.atomic.AtomicReference;
|
||||
import java.util.function.BiConsumer;
|
||||
|
||||
import static org.hamcrest.Matchers.containsString;
|
||||
import static org.hamcrest.Matchers.equalTo;
|
||||
import static org.hamcrest.Matchers.is;
|
||||
import static org.hamcrest.Matchers.nullValue;
|
||||
|
@ -40,19 +42,19 @@ public class GrokTests extends ESTestCase {
|
|||
|
||||
public void testMatchWithoutCaptures() {
|
||||
String line = "value";
|
||||
Grok grok = new Grok(basePatterns, "value");
|
||||
Grok grok = new Grok(basePatterns, "value", logger::warn);
|
||||
Map<String, Object> matches = grok.captures(line);
|
||||
assertEquals(0, matches.size());
|
||||
}
|
||||
|
||||
public void testNoMatchingPatternInDictionary() {
|
||||
Exception e = expectThrows(IllegalArgumentException.class, () -> new Grok(Collections.emptyMap(), "%{NOTFOUND}"));
|
||||
Exception e = expectThrows(IllegalArgumentException.class, () -> new Grok(Collections.emptyMap(), "%{NOTFOUND}", logger::warn));
|
||||
assertThat(e.getMessage(), equalTo("Unable to find pattern [NOTFOUND] in Grok's pattern dictionary"));
|
||||
}
|
||||
|
||||
public void testSimpleSyslogLine() {
|
||||
String line = "Mar 16 00:01:25 evita postfix/smtpd[1713]: connect from camomile.cloud9.net[168.100.1.3]";
|
||||
Grok grok = new Grok(basePatterns, "%{SYSLOGLINE}");
|
||||
Grok grok = new Grok(basePatterns, "%{SYSLOGLINE}", logger::warn);
|
||||
Map<String, Object> matches = grok.captures(line);
|
||||
assertEquals("evita", matches.get("logsource"));
|
||||
assertEquals("Mar 16 00:01:25", matches.get("timestamp"));
|
||||
|
@ -64,7 +66,7 @@ public class GrokTests extends ESTestCase {
|
|||
public void testSyslog5424Line() {
|
||||
String line = "<191>1 2009-06-30T18:30:00+02:00 paxton.local grokdebug 4123 - [id1 foo=\\\"bar\\\"][id2 baz=\\\"something\\\"] " +
|
||||
"Hello, syslog.";
|
||||
Grok grok = new Grok(basePatterns, "%{SYSLOG5424LINE}");
|
||||
Grok grok = new Grok(basePatterns, "%{SYSLOG5424LINE}", logger::warn);
|
||||
Map<String, Object> matches = grok.captures(line);
|
||||
assertEquals("191", matches.get("syslog5424_pri"));
|
||||
assertEquals("1", matches.get("syslog5424_ver"));
|
||||
|
@ -79,13 +81,13 @@ public class GrokTests extends ESTestCase {
|
|||
|
||||
public void testDatePattern() {
|
||||
String line = "fancy 12-12-12 12:12:12";
|
||||
Grok grok = new Grok(basePatterns, "(?<timestamp>%{DATE_EU} %{TIME})");
|
||||
Grok grok = new Grok(basePatterns, "(?<timestamp>%{DATE_EU} %{TIME})", logger::warn);
|
||||
Map<String, Object> matches = grok.captures(line);
|
||||
assertEquals("12-12-12 12:12:12", matches.get("timestamp"));
|
||||
}
|
||||
|
||||
public void testNilCoercedValues() {
|
||||
Grok grok = new Grok(basePatterns, "test (N/A|%{BASE10NUM:duration:float}ms)");
|
||||
Grok grok = new Grok(basePatterns, "test (N/A|%{BASE10NUM:duration:float}ms)", logger::warn);
|
||||
Map<String, Object> matches = grok.captures("test 28.4ms");
|
||||
assertEquals(28.4f, matches.get("duration"));
|
||||
matches = grok.captures("test N/A");
|
||||
|
@ -93,7 +95,7 @@ public class GrokTests extends ESTestCase {
|
|||
}
|
||||
|
||||
public void testNilWithNoCoercion() {
|
||||
Grok grok = new Grok(basePatterns, "test (N/A|%{BASE10NUM:duration}ms)");
|
||||
Grok grok = new Grok(basePatterns, "test (N/A|%{BASE10NUM:duration}ms)", logger::warn);
|
||||
Map<String, Object> matches = grok.captures("test 28.4ms");
|
||||
assertEquals("28.4", matches.get("duration"));
|
||||
matches = grok.captures("test N/A");
|
||||
|
@ -103,7 +105,7 @@ public class GrokTests extends ESTestCase {
|
|||
public void testUnicodeSyslog() {
|
||||
Grok grok = new Grok(basePatterns, "<%{POSINT:syslog_pri}>%{SPACE}%{SYSLOGTIMESTAMP:syslog_timestamp} " +
|
||||
"%{SYSLOGHOST:syslog_hostname} %{PROG:syslog_program}(:?)(?:\\[%{GREEDYDATA:syslog_pid}\\])?(:?) " +
|
||||
"%{GREEDYDATA:syslog_message}");
|
||||
"%{GREEDYDATA:syslog_message}", logger::warn);
|
||||
Map<String, Object> matches = grok.captures("<22>Jan 4 07:50:46 mailmaster postfix/policy-spf[9454]: : " +
|
||||
"SPF permerror (Junk encountered in record 'v=spf1 mx a:mail.domain.no ip4:192.168.0.4 <20>all'): Envelope-from: " +
|
||||
"email@domain.no");
|
||||
|
@ -113,19 +115,19 @@ public class GrokTests extends ESTestCase {
|
|||
}
|
||||
|
||||
public void testNamedFieldsWithWholeTextMatch() {
|
||||
Grok grok = new Grok(basePatterns, "%{DATE_EU:stimestamp}");
|
||||
Grok grok = new Grok(basePatterns, "%{DATE_EU:stimestamp}", logger::warn);
|
||||
Map<String, Object> matches = grok.captures("11/01/01");
|
||||
assertThat(matches.get("stimestamp"), equalTo("11/01/01"));
|
||||
}
|
||||
|
||||
public void testWithOniguramaNamedCaptures() {
|
||||
Grok grok = new Grok(basePatterns, "(?<foo>\\w+)");
|
||||
Grok grok = new Grok(basePatterns, "(?<foo>\\w+)", logger::warn);
|
||||
Map<String, Object> matches = grok.captures("hello world");
|
||||
assertThat(matches.get("foo"), equalTo("hello"));
|
||||
}
|
||||
|
||||
public void testISO8601() {
|
||||
Grok grok = new Grok(basePatterns, "^%{TIMESTAMP_ISO8601}$");
|
||||
Grok grok = new Grok(basePatterns, "^%{TIMESTAMP_ISO8601}$", logger::warn);
|
||||
List<String> timeMessages = Arrays.asList(
|
||||
"2001-01-01T00:00:00",
|
||||
"1974-03-02T04:09:09",
|
||||
|
@ -149,7 +151,7 @@ public class GrokTests extends ESTestCase {
|
|||
}
|
||||
|
||||
public void testNotISO8601() {
|
||||
Grok grok = new Grok(basePatterns, "^%{TIMESTAMP_ISO8601}$");
|
||||
Grok grok = new Grok(basePatterns, "^%{TIMESTAMP_ISO8601}$", logger::warn);
|
||||
List<String> timeMessages = Arrays.asList(
|
||||
"2001-13-01T00:00:00", // invalid month
|
||||
"2001-00-01T00:00:00", // invalid month
|
||||
|
@ -188,7 +190,7 @@ public class GrokTests extends ESTestCase {
|
|||
|
||||
String text = "wowza !!!Tal!!! - Tal";
|
||||
String pattern = "%{EXCITED_NAME} - %{NAME}";
|
||||
Grok g = new Grok(bank, pattern, false);
|
||||
Grok g = new Grok(bank, pattern, false, logger::warn);
|
||||
|
||||
assertEquals("(?<EXCITED_NAME_0>!!!(?<NAME_21>Tal)!!!) - (?<NAME_22>Tal)", g.toRegex(pattern));
|
||||
assertEquals(true, g.match(text));
|
||||
|
@ -206,7 +208,7 @@ public class GrokTests extends ESTestCase {
|
|||
Map<String, String> bank = new HashMap<>();
|
||||
bank.put("NAME", "!!!%{NAME}!!!");
|
||||
String pattern = "%{NAME}";
|
||||
new Grok(bank, pattern, false);
|
||||
new Grok(bank, pattern, false, logger::warn);
|
||||
});
|
||||
assertEquals("circular reference in pattern [NAME][!!!%{NAME}!!!]", e.getMessage());
|
||||
|
||||
|
@ -214,7 +216,7 @@ public class GrokTests extends ESTestCase {
|
|||
Map<String, String> bank = new HashMap<>();
|
||||
bank.put("NAME", "!!!%{NAME:name}!!!");
|
||||
String pattern = "%{NAME}";
|
||||
new Grok(bank, pattern, false);
|
||||
new Grok(bank, pattern, false, logger::warn);
|
||||
});
|
||||
assertEquals("circular reference in pattern [NAME][!!!%{NAME:name}!!!]", e.getMessage());
|
||||
|
||||
|
@ -222,7 +224,7 @@ public class GrokTests extends ESTestCase {
|
|||
Map<String, String> bank = new HashMap<>();
|
||||
bank.put("NAME", "!!!%{NAME:name:int}!!!");
|
||||
String pattern = "%{NAME}";
|
||||
new Grok(bank, pattern, false);
|
||||
new Grok(bank, pattern, false, logger::warn);
|
||||
});
|
||||
assertEquals("circular reference in pattern [NAME][!!!%{NAME:name:int}!!!]", e.getMessage());
|
||||
|
||||
|
@ -231,7 +233,7 @@ public class GrokTests extends ESTestCase {
|
|||
bank.put("NAME1", "!!!%{NAME2}!!!");
|
||||
bank.put("NAME2", "!!!%{NAME1}!!!");
|
||||
String pattern = "%{NAME1}";
|
||||
new Grok(bank, pattern, false);
|
||||
new Grok(bank, pattern, false, logger::warn);
|
||||
});
|
||||
assertEquals("circular reference in pattern [NAME2][!!!%{NAME1}!!!] back to pattern [NAME1]", e.getMessage());
|
||||
|
||||
|
@ -241,7 +243,7 @@ public class GrokTests extends ESTestCase {
|
|||
bank.put("NAME2", "!!!%{NAME3}!!!");
|
||||
bank.put("NAME3", "!!!%{NAME1}!!!");
|
||||
String pattern = "%{NAME1}";
|
||||
new Grok(bank, pattern, false);
|
||||
new Grok(bank, pattern, false, logger::warn);
|
||||
});
|
||||
assertEquals("circular reference in pattern [NAME3][!!!%{NAME1}!!!] back to pattern [NAME1] via patterns [NAME2]",
|
||||
e.getMessage());
|
||||
|
@ -254,7 +256,7 @@ public class GrokTests extends ESTestCase {
|
|||
bank.put("NAME4", "!!!%{NAME5}!!!");
|
||||
bank.put("NAME5", "!!!%{NAME1}!!!");
|
||||
String pattern = "%{NAME1}";
|
||||
new Grok(bank, pattern, false);
|
||||
new Grok(bank, pattern, false, logger::warn );
|
||||
});
|
||||
assertEquals("circular reference in pattern [NAME5][!!!%{NAME1}!!!] back to pattern [NAME1] " +
|
||||
"via patterns [NAME2=>NAME3=>NAME4]", e.getMessage());
|
||||
|
@ -262,7 +264,7 @@ public class GrokTests extends ESTestCase {
|
|||
|
||||
public void testBooleanCaptures() {
|
||||
String pattern = "%{WORD:name}=%{WORD:status:boolean}";
|
||||
Grok g = new Grok(basePatterns, pattern);
|
||||
Grok g = new Grok(basePatterns, pattern, logger::warn);
|
||||
|
||||
String text = "active=true";
|
||||
Map<String, Object> expected = new HashMap<>();
|
||||
|
@ -279,7 +281,7 @@ public class GrokTests extends ESTestCase {
|
|||
bank.put("NUMBER", "(?:%{BASE10NUM})");
|
||||
|
||||
String pattern = "%{NUMBER:bytes:float} %{NUMBER:id:long} %{NUMBER:rating:double}";
|
||||
Grok g = new Grok(bank, pattern);
|
||||
Grok g = new Grok(bank, pattern, logger::warn);
|
||||
|
||||
String text = "12009.34 20000000000 4820.092";
|
||||
Map<String, Object> expected = new HashMap<>();
|
||||
|
@ -297,7 +299,7 @@ public class GrokTests extends ESTestCase {
|
|||
bank.put("NUMBER", "(?:%{BASE10NUM})");
|
||||
|
||||
String pattern = "%{NUMBER:bytes:float} %{NUMBER:status} %{NUMBER}";
|
||||
Grok g = new Grok(bank, pattern);
|
||||
Grok g = new Grok(bank, pattern, logger::warn);
|
||||
|
||||
String text = "12009.34 200 9032";
|
||||
Map<String, Object> expected = new HashMap<>();
|
||||
|
@ -312,7 +314,7 @@ public class GrokTests extends ESTestCase {
|
|||
String logLine = "31.184.238.164 - - [24/Jul/2014:05:35:37 +0530] \"GET /logs/access.log HTTP/1.0\" 200 69849 " +
|
||||
"\"http://8rursodiol.enjin.com\" \"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) " +
|
||||
"Chrome/30.0.1599.12785 YaBrowser/13.12.1599.12785 Safari/537.36\" \"www.dlwindianrailways.com\"";
|
||||
Grok grok = new Grok(basePatterns, "%{COMBINEDAPACHELOG}");
|
||||
Grok grok = new Grok(basePatterns, "%{COMBINEDAPACHELOG}", logger::warn);
|
||||
Map<String, Object> matches = grok.captures(logLine);
|
||||
|
||||
assertEquals("31.184.238.164", matches.get("clientip"));
|
||||
|
@ -372,7 +374,7 @@ public class GrokTests extends ESTestCase {
|
|||
String pattern = "%{IPORHOST:clientip} %{USER:ident} %{USER:auth} \\[%{HTTPDATE:timestamp}\\] \"%{WORD:verb} %{DATA:request} " +
|
||||
"HTTP/%{NUMBER:httpversion}\" %{NUMBER:response:int} (?:-|%{NUMBER:bytes:int}) %{QS:referrer} %{QS:agent}";
|
||||
|
||||
Grok grok = new Grok(bank, pattern);
|
||||
Grok grok = new Grok(bank, pattern, logger::warn);
|
||||
|
||||
Map<String, Object> expected = new HashMap<>();
|
||||
expected.put("clientip", "83.149.9.216");
|
||||
|
@ -396,14 +398,14 @@ public class GrokTests extends ESTestCase {
|
|||
public void testNoMatch() {
|
||||
Map<String, String> bank = new HashMap<>();
|
||||
bank.put("MONTHDAY", "(?:(?:0[1-9])|(?:[12][0-9])|(?:3[01])|[1-9])");
|
||||
Grok grok = new Grok(bank, "%{MONTHDAY:greatday}");
|
||||
Grok grok = new Grok(bank, "%{MONTHDAY:greatday}", logger::warn);
|
||||
assertThat(grok.captures("nomatch"), nullValue());
|
||||
}
|
||||
|
||||
public void testMultipleNamedCapturesWithSameName() {
|
||||
Map<String, String> bank = new HashMap<>();
|
||||
bank.put("SINGLEDIGIT", "[0-9]");
|
||||
Grok grok = new Grok(bank, "%{SINGLEDIGIT:num}%{SINGLEDIGIT:num}");
|
||||
Grok grok = new Grok(bank, "%{SINGLEDIGIT:num}%{SINGLEDIGIT:num}", logger::warn);
|
||||
|
||||
Map<String, Object> expected = new HashMap<>();
|
||||
expected.put("num", "1");
|
||||
|
@ -430,7 +432,8 @@ public class GrokTests extends ESTestCase {
|
|||
});
|
||||
t.start();
|
||||
};
|
||||
Grok grok = new Grok(basePatterns, grokPattern, MatcherWatchdog.newInstance(10, 200, System::currentTimeMillis, scheduler));
|
||||
Grok grok = new Grok(basePatterns, grokPattern, MatcherWatchdog.newInstance(10, 200, System::currentTimeMillis, scheduler),
|
||||
logger::warn);
|
||||
Exception e = expectThrows(RuntimeException.class, () -> grok.captures(logLine));
|
||||
run.set(false);
|
||||
assertThat(e.getMessage(), equalTo("grok pattern matching was interrupted after [200] ms"));
|
||||
|
@ -470,24 +473,32 @@ public class GrokTests extends ESTestCase {
|
|||
}
|
||||
|
||||
public void testUnsupportedBracketsInFieldName() {
|
||||
Grok grok = new Grok(basePatterns, "%{WORD:unsuppo(r)ted}");
|
||||
Grok grok = new Grok(basePatterns, "%{WORD:unsuppo(r)ted}", logger::warn);
|
||||
Map<String, Object> matches = grok.captures("line");
|
||||
assertNull(matches);
|
||||
}
|
||||
|
||||
public void testJavaClassPatternWithUnderscore() {
|
||||
Grok grok = new Grok(basePatterns, "%{JAVACLASS}");
|
||||
Grok grok = new Grok(basePatterns, "%{JAVACLASS}", logger::warn);
|
||||
assertThat(grok.match("Test_Class.class"), is(true));
|
||||
}
|
||||
|
||||
public void testJavaFilePatternWithSpaces() {
|
||||
Grok grok = new Grok(basePatterns, "%{JAVAFILE}");
|
||||
Grok grok = new Grok(basePatterns, "%{JAVAFILE}", logger::warn);
|
||||
assertThat(grok.match("Test Class.java"), is(true));
|
||||
}
|
||||
|
||||
public void testLogCallBack(){
|
||||
AtomicReference<String> message = new AtomicReference<>();
|
||||
Grok grok = new Grok(basePatterns, ".*\\[.*%{SPACE}*\\].*", message::set);
|
||||
grok.match("[foo]");
|
||||
//this message comes from Joni, so updates to Joni may change the expectation
|
||||
assertThat(message.get(), containsString("regular expression has redundant nested repeat operator"));
|
||||
}
|
||||
|
||||
private void assertGrokedField(String fieldName) {
|
||||
String line = "foo";
|
||||
Grok grok = new Grok(basePatterns, "%{WORD:" + fieldName + "}");
|
||||
Grok grok = new Grok(basePatterns, "%{WORD:" + fieldName + "}", logger::warn);
|
||||
Map<String, Object> matches = grok.captures(line);
|
||||
assertEquals(line, matches.get(fieldName));
|
||||
}
|
||||
|
|
|
@ -40,3 +40,16 @@ testClusters.integTest {
|
|||
// (this is because the integTest node is not using default distribution, but only the minimal number of required modules)
|
||||
module project(':modules:lang-mustache').tasks.bundlePlugin.archiveFile
|
||||
}
|
||||
|
||||
thirdPartyAudit.ignoreMissingClasses(
|
||||
// from log4j
|
||||
'org.osgi.framework.AdaptPermission',
|
||||
'org.osgi.framework.AdminPermission',
|
||||
'org.osgi.framework.Bundle',
|
||||
'org.osgi.framework.BundleActivator',
|
||||
'org.osgi.framework.BundleContext',
|
||||
'org.osgi.framework.BundleEvent',
|
||||
'org.osgi.framework.SynchronousBundleListener',
|
||||
'org.osgi.framework.wiring.BundleWire',
|
||||
'org.osgi.framework.wiring.BundleWiring'
|
||||
)
|
||||
|
|
|
@ -19,6 +19,8 @@
|
|||
|
||||
package org.elasticsearch.ingest.common;
|
||||
|
||||
import org.apache.logging.log4j.LogManager;
|
||||
import org.apache.logging.log4j.Logger;
|
||||
import org.elasticsearch.grok.Grok;
|
||||
import org.elasticsearch.grok.MatcherWatchdog;
|
||||
import org.elasticsearch.ingest.AbstractProcessor;
|
||||
|
@ -36,6 +38,7 @@ public final class GrokProcessor extends AbstractProcessor {
|
|||
|
||||
public static final String TYPE = "grok";
|
||||
private static final String PATTERN_MATCH_KEY = "_ingest._grok_match_index";
|
||||
private static final Logger logger = LogManager.getLogger(GrokProcessor.class);
|
||||
|
||||
private final String matchField;
|
||||
private final List<String> matchPatterns;
|
||||
|
@ -48,9 +51,12 @@ public final class GrokProcessor extends AbstractProcessor {
|
|||
super(tag);
|
||||
this.matchField = matchField;
|
||||
this.matchPatterns = matchPatterns;
|
||||
this.grok = new Grok(patternBank, combinePatterns(matchPatterns, traceMatch), matcherWatchdog);
|
||||
this.grok = new Grok(patternBank, combinePatterns(matchPatterns, traceMatch), matcherWatchdog, logger::debug);
|
||||
this.traceMatch = traceMatch;
|
||||
this.ignoreMissing = ignoreMissing;
|
||||
// Joni warnings are only emitted on an attempt to match, and the warning emitted for every call to match which is too verbose
|
||||
// so here we emit a warning (if there is one) to the logfile at warn level on construction / processor creation.
|
||||
new Grok(patternBank, combinePatterns(matchPatterns, traceMatch), matcherWatchdog, logger::warn).match("___nomatch___");
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -5,6 +5,8 @@
|
|||
*/
|
||||
package org.elasticsearch.xpack.ml.filestructurefinder;
|
||||
|
||||
import org.apache.logging.log4j.LogManager;
|
||||
import org.apache.logging.log4j.Logger;
|
||||
import org.elasticsearch.common.collect.Tuple;
|
||||
import org.elasticsearch.common.util.set.Sets;
|
||||
import org.elasticsearch.grok.Grok;
|
||||
|
@ -27,6 +29,7 @@ import java.util.stream.Stream;
|
|||
|
||||
public final class FileStructureUtils {
|
||||
|
||||
private static final Logger logger = LogManager.getLogger(FileStructureUtils.class);
|
||||
public static final String DEFAULT_TIMESTAMP_FIELD = "@timestamp";
|
||||
public static final String MAPPING_TYPE_SETTING = "type";
|
||||
public static final String MAPPING_FORMAT_SETTING = "format";
|
||||
|
@ -61,10 +64,11 @@ public final class FileStructureUtils {
|
|||
private static final int NUM_TOP_HITS = 10;
|
||||
// NUMBER Grok pattern doesn't support scientific notation, so we extend it
|
||||
private static final Grok NUMBER_GROK = new Grok(Grok.getBuiltinPatterns(), "^%{NUMBER}(?:[eE][+-]?[0-3]?[0-9]{1,2})?$",
|
||||
TimeoutChecker.watchdog);
|
||||
private static final Grok IP_GROK = new Grok(Grok.getBuiltinPatterns(), "^%{IP}$", TimeoutChecker.watchdog);
|
||||
private static final Grok GEO_POINT_WKT = new Grok(EXTENDED_PATTERNS, "^%{WKT_POINT}$", TimeoutChecker.watchdog);
|
||||
private static final Grok GEO_WKT = new Grok(EXTENDED_PATTERNS, "^(?:%{WKT_ANY}|%{WKT_GEOMETRYCOLLECTION})$", TimeoutChecker.watchdog);
|
||||
TimeoutChecker.watchdog, logger::warn);
|
||||
private static final Grok IP_GROK = new Grok(Grok.getBuiltinPatterns(), "^%{IP}$", TimeoutChecker.watchdog, logger::warn);
|
||||
private static final Grok GEO_POINT_WKT = new Grok(EXTENDED_PATTERNS, "^%{WKT_POINT}$", TimeoutChecker.watchdog, logger::warn);
|
||||
private static final Grok GEO_WKT = new Grok(EXTENDED_PATTERNS, "^(?:%{WKT_ANY}|%{WKT_GEOMETRYCOLLECTION})$", TimeoutChecker.watchdog,
|
||||
logger::warn);
|
||||
private static final int KEYWORD_MAX_LEN = 256;
|
||||
private static final int KEYWORD_MAX_SPACES = 5;
|
||||
|
||||
|
|
|
@ -5,6 +5,8 @@
|
|||
*/
|
||||
package org.elasticsearch.xpack.ml.filestructurefinder;
|
||||
|
||||
import org.apache.logging.log4j.LogManager;
|
||||
import org.apache.logging.log4j.Logger;
|
||||
import org.elasticsearch.common.collect.Tuple;
|
||||
import org.elasticsearch.grok.Grok;
|
||||
import org.elasticsearch.xpack.core.ml.filestructurefinder.FieldStats;
|
||||
|
@ -30,6 +32,7 @@ import java.util.stream.Collectors;
|
|||
*/
|
||||
public final class GrokPatternCreator {
|
||||
|
||||
private static final Logger logger = LogManager.getLogger(GrokPatternCreator.class);
|
||||
private static final Map<Character, Boolean> PUNCTUATION_OR_SPACE_NEEDS_ESCAPING;
|
||||
static {
|
||||
HashMap<Character, Boolean> punctuationOrSpaceNeedsEscaping = new HashMap<>();
|
||||
|
@ -498,7 +501,7 @@ public final class GrokPatternCreator {
|
|||
grok = new Grok(grokPatternDefinitions,
|
||||
"(?m)%{DATA:" + PREFACE + "}" + Objects.requireNonNull(preBreak) +
|
||||
"%{" + grokPatternName + ":" + VALUE + "}" + Objects.requireNonNull(postBreak) + "%{GREEDYDATA:" + EPILOGUE + "}",
|
||||
TimeoutChecker.watchdog);
|
||||
TimeoutChecker.watchdog, logger::warn);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -592,7 +595,7 @@ public final class GrokPatternCreator {
|
|||
throw new IllegalStateException("Cannot process KV matches until a field name has been determined");
|
||||
}
|
||||
Grok grok = new Grok(Grok.getBuiltinPatterns(), "(?m)%{DATA:" + PREFACE + "}\\b" +
|
||||
fieldName + "=%{USER:" + VALUE + "}%{GREEDYDATA:" + EPILOGUE + "}", TimeoutChecker.watchdog);
|
||||
fieldName + "=%{USER:" + VALUE + "}%{GREEDYDATA:" + EPILOGUE + "}", TimeoutChecker.watchdog, logger::warn);
|
||||
Collection<String> values = new ArrayList<>();
|
||||
for (String snippet : snippets) {
|
||||
Map<String, Object> captures = grok.captures(snippet);
|
||||
|
@ -666,7 +669,7 @@ public final class GrokPatternCreator {
|
|||
private FullMatchGrokPatternCandidate(String grokPattern, String timeField, Map<String, String> grokPatternDefinitions) {
|
||||
this.grokPattern = grokPattern;
|
||||
this.timeField = timeField;
|
||||
grok = new Grok(grokPatternDefinitions, grokPattern, TimeoutChecker.watchdog);
|
||||
grok = new Grok(grokPatternDefinitions, grokPattern, TimeoutChecker.watchdog, logger::warn);
|
||||
}
|
||||
|
||||
public String getTimeField() {
|
||||
|
|
|
@ -5,6 +5,8 @@
|
|||
*/
|
||||
package org.elasticsearch.xpack.ml.filestructurefinder;
|
||||
|
||||
import org.apache.logging.log4j.LogManager;
|
||||
import org.apache.logging.log4j.Logger;
|
||||
import org.elasticsearch.common.Nullable;
|
||||
import org.elasticsearch.common.SuppressForbidden;
|
||||
import org.elasticsearch.common.collect.Tuple;
|
||||
|
@ -48,6 +50,7 @@ public final class TimestampFormatFinder {
|
|||
private static final String PREFACE = "preface";
|
||||
private static final String EPILOGUE = "epilogue";
|
||||
|
||||
private static final Logger logger = LogManager.getLogger(TimestampFormatFinder.class);
|
||||
private static final String PUNCTUATION_THAT_NEEDS_ESCAPING_IN_REGEX = "\\|()[]{}^$.*?";
|
||||
private static final String FRACTIONAL_SECOND_SEPARATORS = ":.,";
|
||||
private static final char INDETERMINATE_FIELD_PLACEHOLDER = '?';
|
||||
|
@ -1400,8 +1403,9 @@ public final class TimestampFormatFinder {
|
|||
this.strictGrokPattern = Objects.requireNonNull(strictGrokPattern);
|
||||
// The (?m) here has the Ruby meaning, which is equivalent to (?s) in Java
|
||||
this.strictSearchGrok = new Grok(Grok.getBuiltinPatterns(), "(?m)%{DATA:" + PREFACE + "}" + strictGrokPattern +
|
||||
"%{GREEDYDATA:" + EPILOGUE + "}", TimeoutChecker.watchdog);
|
||||
this.strictFullMatchGrok = new Grok(Grok.getBuiltinPatterns(), "^" + strictGrokPattern + "$", TimeoutChecker.watchdog);
|
||||
"%{GREEDYDATA:" + EPILOGUE + "}", TimeoutChecker.watchdog, logger::warn);
|
||||
this.strictFullMatchGrok = new Grok(Grok.getBuiltinPatterns(), "^" + strictGrokPattern + "$", TimeoutChecker.watchdog,
|
||||
logger::warn);
|
||||
this.outputGrokPatternName = Objects.requireNonNull(outputGrokPatternName);
|
||||
this.quickRuleOutBitSets = quickRuleOutPatterns.stream().map(TimestampFormatFinder::stringToNumberPosBitSet)
|
||||
.collect(Collectors.toList());
|
||||
|
|
|
@ -272,7 +272,7 @@ public final class GrokPatternCreator {
|
|||
this.grokPatternName = grokPatternName;
|
||||
this.fieldName = fieldName;
|
||||
this.grok = new Grok(Grok.getBuiltinPatterns(), "%{DATA:" + PREFACE + "}" + preBreak + "%{" + grokPatternName + ":this}" +
|
||||
postBreak + "%{GREEDYDATA:" + EPILOGUE + "}");
|
||||
postBreak + "%{GREEDYDATA:" + EPILOGUE + "}", logger::warn);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -80,7 +80,7 @@ public class TimeoutCheckerTests extends FileStructureTestCase {
|
|||
}
|
||||
|
||||
public void testGrokCaptures() throws Exception {
|
||||
Grok grok = new Grok(Grok.getBuiltinPatterns(), "{%DATA:data}{%GREEDYDATA:greedydata}", TimeoutChecker.watchdog);
|
||||
Grok grok = new Grok(Grok.getBuiltinPatterns(), "{%DATA:data}{%GREEDYDATA:greedydata}", TimeoutChecker.watchdog, logger::warn);
|
||||
TimeValue timeout = TimeValue.timeValueMillis(1);
|
||||
try (TimeoutChecker timeoutChecker = new TimeoutChecker("grok captures test", timeout, scheduler)) {
|
||||
|
||||
|
|
Loading…
Reference in New Issue