[7.x] Ensure Joni warning are logged at debug (#57302) (#57897)

When Joni, the regex engine that powers grok emits a warning it
does so by default to System.err. System.err logs are all bucketed
together in the server log at WARN level. When Joni emits a warning,
it can be extremely verbose, logging a message for each execution
again that pattern. For ingest node that means for every document
that is run that through Grok. Fortunately, Joni provides a call
back hook to push these warnings to a custom location.

This commit implements Joni's callback hook to push the Joni warning
to the Elasticsearch server logger (logger.org.elasticsearch.ingest.common.GrokProcessor)
at debug level. Generally these warning indicate a possible issue with
the regular expression and upon creation of the Grok processor will
do a "test run" of the expression and log the result (if any) at WARN 
level. This WARN level log should only occur on pipeline creation which 
is a much lower frequency then every document. 

Additionally, the documentation is updated with instructions for how
to set the logger to debug level.
This commit is contained in:
Jake Landis 2020-06-09 17:06:29 -05:00 committed by GitHub
parent aab6317260
commit a370d5eead
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
10 changed files with 115 additions and 51 deletions

View File

@ -324,3 +324,23 @@ settings:
| `ingest.grok.watchdog.interval` | 1s | How often to check whether there are grok evaluations that take longer than the maximum allowed execution time.
| `ingest.grok.watchdog.max_execution_time` | 1s | The maximum allowed execution of a grok expression evaluation.
|======
[[grok-debugging]]
==== Grok debugging
It is advised to use the {kibana-ref}/xpack-grokdebugger.html[Grok Debugger] to debug grok patterns. From there you can test one or more
patterns in the UI against sample data. Under the covers it uses the same engine as ingest node processor.
Additionally, it is recommended to enable debug logging for Grok so that any additional messages may also be seen in the Elasticsearch
server log.
[source,js]
--------------------------------------------------
PUT _cluster/settings
{
"transient": {
"logger.org.elasticsearch.ingest.common.GrokProcessor": "debug"
}
}
--------------------------------------------------
// NOTCONSOLE

View File

@ -41,6 +41,7 @@ import java.util.Iterator;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.function.Consumer;
public final class Grok {
@ -77,19 +78,20 @@ public final class Grok {
private final Regex compiledExpression;
private final MatcherWatchdog matcherWatchdog;
public Grok(Map<String, String> patternBank, String grokPattern) {
this(patternBank, grokPattern, true, MatcherWatchdog.noop());
public Grok(Map<String, String> patternBank, String grokPattern, Consumer<String> logCallBack) {
this(patternBank, grokPattern, true, MatcherWatchdog.noop(), logCallBack);
}
public Grok(Map<String, String> patternBank, String grokPattern, MatcherWatchdog matcherWatchdog) {
this(patternBank, grokPattern, true, matcherWatchdog);
public Grok(Map<String, String> patternBank, String grokPattern, MatcherWatchdog matcherWatchdog, Consumer<String> logCallBack) {
this(patternBank, grokPattern, true, matcherWatchdog, logCallBack);
}
Grok(Map<String, String> patternBank, String grokPattern, boolean namedCaptures) {
this(patternBank, grokPattern, namedCaptures, MatcherWatchdog.noop());
Grok(Map<String, String> patternBank, String grokPattern, boolean namedCaptures, Consumer<String> logCallBack) {
this(patternBank, grokPattern, namedCaptures, MatcherWatchdog.noop(), logCallBack);
}
private Grok(Map<String, String> patternBank, String grokPattern, boolean namedCaptures, MatcherWatchdog matcherWatchdog) {
private Grok(Map<String, String> patternBank, String grokPattern, boolean namedCaptures, MatcherWatchdog matcherWatchdog,
Consumer<String> logCallBack) {
this.patternBank = patternBank;
this.namedCaptures = namedCaptures;
this.matcherWatchdog = matcherWatchdog;
@ -102,7 +104,8 @@ public final class Grok {
String expression = toRegex(grokPattern);
byte[] expressionBytes = expression.getBytes(StandardCharsets.UTF_8);
this.compiledExpression = new Regex(expressionBytes, 0, expressionBytes.length, Option.DEFAULT, UTF8Encoding.INSTANCE);
this.compiledExpression = new Regex(expressionBytes, 0, expressionBytes.length, Option.DEFAULT, UTF8Encoding.INSTANCE,
message -> logCallBack.accept(message));
}
/**

View File

@ -28,8 +28,10 @@ import java.util.List;
import java.util.Map;
import java.util.TreeMap;
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.concurrent.atomic.AtomicReference;
import java.util.function.BiConsumer;
import static org.hamcrest.Matchers.containsString;
import static org.hamcrest.Matchers.equalTo;
import static org.hamcrest.Matchers.is;
import static org.hamcrest.Matchers.nullValue;
@ -40,19 +42,19 @@ public class GrokTests extends ESTestCase {
public void testMatchWithoutCaptures() {
String line = "value";
Grok grok = new Grok(basePatterns, "value");
Grok grok = new Grok(basePatterns, "value", logger::warn);
Map<String, Object> matches = grok.captures(line);
assertEquals(0, matches.size());
}
public void testNoMatchingPatternInDictionary() {
Exception e = expectThrows(IllegalArgumentException.class, () -> new Grok(Collections.emptyMap(), "%{NOTFOUND}"));
Exception e = expectThrows(IllegalArgumentException.class, () -> new Grok(Collections.emptyMap(), "%{NOTFOUND}", logger::warn));
assertThat(e.getMessage(), equalTo("Unable to find pattern [NOTFOUND] in Grok's pattern dictionary"));
}
public void testSimpleSyslogLine() {
String line = "Mar 16 00:01:25 evita postfix/smtpd[1713]: connect from camomile.cloud9.net[168.100.1.3]";
Grok grok = new Grok(basePatterns, "%{SYSLOGLINE}");
Grok grok = new Grok(basePatterns, "%{SYSLOGLINE}", logger::warn);
Map<String, Object> matches = grok.captures(line);
assertEquals("evita", matches.get("logsource"));
assertEquals("Mar 16 00:01:25", matches.get("timestamp"));
@ -64,7 +66,7 @@ public class GrokTests extends ESTestCase {
public void testSyslog5424Line() {
String line = "<191>1 2009-06-30T18:30:00+02:00 paxton.local grokdebug 4123 - [id1 foo=\\\"bar\\\"][id2 baz=\\\"something\\\"] " +
"Hello, syslog.";
Grok grok = new Grok(basePatterns, "%{SYSLOG5424LINE}");
Grok grok = new Grok(basePatterns, "%{SYSLOG5424LINE}", logger::warn);
Map<String, Object> matches = grok.captures(line);
assertEquals("191", matches.get("syslog5424_pri"));
assertEquals("1", matches.get("syslog5424_ver"));
@ -79,13 +81,13 @@ public class GrokTests extends ESTestCase {
public void testDatePattern() {
String line = "fancy 12-12-12 12:12:12";
Grok grok = new Grok(basePatterns, "(?<timestamp>%{DATE_EU} %{TIME})");
Grok grok = new Grok(basePatterns, "(?<timestamp>%{DATE_EU} %{TIME})", logger::warn);
Map<String, Object> matches = grok.captures(line);
assertEquals("12-12-12 12:12:12", matches.get("timestamp"));
}
public void testNilCoercedValues() {
Grok grok = new Grok(basePatterns, "test (N/A|%{BASE10NUM:duration:float}ms)");
Grok grok = new Grok(basePatterns, "test (N/A|%{BASE10NUM:duration:float}ms)", logger::warn);
Map<String, Object> matches = grok.captures("test 28.4ms");
assertEquals(28.4f, matches.get("duration"));
matches = grok.captures("test N/A");
@ -93,7 +95,7 @@ public class GrokTests extends ESTestCase {
}
public void testNilWithNoCoercion() {
Grok grok = new Grok(basePatterns, "test (N/A|%{BASE10NUM:duration}ms)");
Grok grok = new Grok(basePatterns, "test (N/A|%{BASE10NUM:duration}ms)", logger::warn);
Map<String, Object> matches = grok.captures("test 28.4ms");
assertEquals("28.4", matches.get("duration"));
matches = grok.captures("test N/A");
@ -103,7 +105,7 @@ public class GrokTests extends ESTestCase {
public void testUnicodeSyslog() {
Grok grok = new Grok(basePatterns, "<%{POSINT:syslog_pri}>%{SPACE}%{SYSLOGTIMESTAMP:syslog_timestamp} " +
"%{SYSLOGHOST:syslog_hostname} %{PROG:syslog_program}(:?)(?:\\[%{GREEDYDATA:syslog_pid}\\])?(:?) " +
"%{GREEDYDATA:syslog_message}");
"%{GREEDYDATA:syslog_message}", logger::warn);
Map<String, Object> matches = grok.captures("<22>Jan 4 07:50:46 mailmaster postfix/policy-spf[9454]: : " +
"SPF permerror (Junk encountered in record 'v=spf1 mx a:mail.domain.no ip4:192.168.0.4 <20>all'): Envelope-from: " +
"email@domain.no");
@ -113,19 +115,19 @@ public class GrokTests extends ESTestCase {
}
public void testNamedFieldsWithWholeTextMatch() {
Grok grok = new Grok(basePatterns, "%{DATE_EU:stimestamp}");
Grok grok = new Grok(basePatterns, "%{DATE_EU:stimestamp}", logger::warn);
Map<String, Object> matches = grok.captures("11/01/01");
assertThat(matches.get("stimestamp"), equalTo("11/01/01"));
}
public void testWithOniguramaNamedCaptures() {
Grok grok = new Grok(basePatterns, "(?<foo>\\w+)");
Grok grok = new Grok(basePatterns, "(?<foo>\\w+)", logger::warn);
Map<String, Object> matches = grok.captures("hello world");
assertThat(matches.get("foo"), equalTo("hello"));
}
public void testISO8601() {
Grok grok = new Grok(basePatterns, "^%{TIMESTAMP_ISO8601}$");
Grok grok = new Grok(basePatterns, "^%{TIMESTAMP_ISO8601}$", logger::warn);
List<String> timeMessages = Arrays.asList(
"2001-01-01T00:00:00",
"1974-03-02T04:09:09",
@ -149,7 +151,7 @@ public class GrokTests extends ESTestCase {
}
public void testNotISO8601() {
Grok grok = new Grok(basePatterns, "^%{TIMESTAMP_ISO8601}$");
Grok grok = new Grok(basePatterns, "^%{TIMESTAMP_ISO8601}$", logger::warn);
List<String> timeMessages = Arrays.asList(
"2001-13-01T00:00:00", // invalid month
"2001-00-01T00:00:00", // invalid month
@ -188,7 +190,7 @@ public class GrokTests extends ESTestCase {
String text = "wowza !!!Tal!!! - Tal";
String pattern = "%{EXCITED_NAME} - %{NAME}";
Grok g = new Grok(bank, pattern, false);
Grok g = new Grok(bank, pattern, false, logger::warn);
assertEquals("(?<EXCITED_NAME_0>!!!(?<NAME_21>Tal)!!!) - (?<NAME_22>Tal)", g.toRegex(pattern));
assertEquals(true, g.match(text));
@ -206,7 +208,7 @@ public class GrokTests extends ESTestCase {
Map<String, String> bank = new HashMap<>();
bank.put("NAME", "!!!%{NAME}!!!");
String pattern = "%{NAME}";
new Grok(bank, pattern, false);
new Grok(bank, pattern, false, logger::warn);
});
assertEquals("circular reference in pattern [NAME][!!!%{NAME}!!!]", e.getMessage());
@ -214,7 +216,7 @@ public class GrokTests extends ESTestCase {
Map<String, String> bank = new HashMap<>();
bank.put("NAME", "!!!%{NAME:name}!!!");
String pattern = "%{NAME}";
new Grok(bank, pattern, false);
new Grok(bank, pattern, false, logger::warn);
});
assertEquals("circular reference in pattern [NAME][!!!%{NAME:name}!!!]", e.getMessage());
@ -222,7 +224,7 @@ public class GrokTests extends ESTestCase {
Map<String, String> bank = new HashMap<>();
bank.put("NAME", "!!!%{NAME:name:int}!!!");
String pattern = "%{NAME}";
new Grok(bank, pattern, false);
new Grok(bank, pattern, false, logger::warn);
});
assertEquals("circular reference in pattern [NAME][!!!%{NAME:name:int}!!!]", e.getMessage());
@ -231,7 +233,7 @@ public class GrokTests extends ESTestCase {
bank.put("NAME1", "!!!%{NAME2}!!!");
bank.put("NAME2", "!!!%{NAME1}!!!");
String pattern = "%{NAME1}";
new Grok(bank, pattern, false);
new Grok(bank, pattern, false, logger::warn);
});
assertEquals("circular reference in pattern [NAME2][!!!%{NAME1}!!!] back to pattern [NAME1]", e.getMessage());
@ -241,7 +243,7 @@ public class GrokTests extends ESTestCase {
bank.put("NAME2", "!!!%{NAME3}!!!");
bank.put("NAME3", "!!!%{NAME1}!!!");
String pattern = "%{NAME1}";
new Grok(bank, pattern, false);
new Grok(bank, pattern, false, logger::warn);
});
assertEquals("circular reference in pattern [NAME3][!!!%{NAME1}!!!] back to pattern [NAME1] via patterns [NAME2]",
e.getMessage());
@ -254,7 +256,7 @@ public class GrokTests extends ESTestCase {
bank.put("NAME4", "!!!%{NAME5}!!!");
bank.put("NAME5", "!!!%{NAME1}!!!");
String pattern = "%{NAME1}";
new Grok(bank, pattern, false);
new Grok(bank, pattern, false, logger::warn );
});
assertEquals("circular reference in pattern [NAME5][!!!%{NAME1}!!!] back to pattern [NAME1] " +
"via patterns [NAME2=>NAME3=>NAME4]", e.getMessage());
@ -262,7 +264,7 @@ public class GrokTests extends ESTestCase {
public void testBooleanCaptures() {
String pattern = "%{WORD:name}=%{WORD:status:boolean}";
Grok g = new Grok(basePatterns, pattern);
Grok g = new Grok(basePatterns, pattern, logger::warn);
String text = "active=true";
Map<String, Object> expected = new HashMap<>();
@ -279,7 +281,7 @@ public class GrokTests extends ESTestCase {
bank.put("NUMBER", "(?:%{BASE10NUM})");
String pattern = "%{NUMBER:bytes:float} %{NUMBER:id:long} %{NUMBER:rating:double}";
Grok g = new Grok(bank, pattern);
Grok g = new Grok(bank, pattern, logger::warn);
String text = "12009.34 20000000000 4820.092";
Map<String, Object> expected = new HashMap<>();
@ -297,7 +299,7 @@ public class GrokTests extends ESTestCase {
bank.put("NUMBER", "(?:%{BASE10NUM})");
String pattern = "%{NUMBER:bytes:float} %{NUMBER:status} %{NUMBER}";
Grok g = new Grok(bank, pattern);
Grok g = new Grok(bank, pattern, logger::warn);
String text = "12009.34 200 9032";
Map<String, Object> expected = new HashMap<>();
@ -312,7 +314,7 @@ public class GrokTests extends ESTestCase {
String logLine = "31.184.238.164 - - [24/Jul/2014:05:35:37 +0530] \"GET /logs/access.log HTTP/1.0\" 200 69849 " +
"\"http://8rursodiol.enjin.com\" \"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) " +
"Chrome/30.0.1599.12785 YaBrowser/13.12.1599.12785 Safari/537.36\" \"www.dlwindianrailways.com\"";
Grok grok = new Grok(basePatterns, "%{COMBINEDAPACHELOG}");
Grok grok = new Grok(basePatterns, "%{COMBINEDAPACHELOG}", logger::warn);
Map<String, Object> matches = grok.captures(logLine);
assertEquals("31.184.238.164", matches.get("clientip"));
@ -372,7 +374,7 @@ public class GrokTests extends ESTestCase {
String pattern = "%{IPORHOST:clientip} %{USER:ident} %{USER:auth} \\[%{HTTPDATE:timestamp}\\] \"%{WORD:verb} %{DATA:request} " +
"HTTP/%{NUMBER:httpversion}\" %{NUMBER:response:int} (?:-|%{NUMBER:bytes:int}) %{QS:referrer} %{QS:agent}";
Grok grok = new Grok(bank, pattern);
Grok grok = new Grok(bank, pattern, logger::warn);
Map<String, Object> expected = new HashMap<>();
expected.put("clientip", "83.149.9.216");
@ -396,14 +398,14 @@ public class GrokTests extends ESTestCase {
public void testNoMatch() {
Map<String, String> bank = new HashMap<>();
bank.put("MONTHDAY", "(?:(?:0[1-9])|(?:[12][0-9])|(?:3[01])|[1-9])");
Grok grok = new Grok(bank, "%{MONTHDAY:greatday}");
Grok grok = new Grok(bank, "%{MONTHDAY:greatday}", logger::warn);
assertThat(grok.captures("nomatch"), nullValue());
}
public void testMultipleNamedCapturesWithSameName() {
Map<String, String> bank = new HashMap<>();
bank.put("SINGLEDIGIT", "[0-9]");
Grok grok = new Grok(bank, "%{SINGLEDIGIT:num}%{SINGLEDIGIT:num}");
Grok grok = new Grok(bank, "%{SINGLEDIGIT:num}%{SINGLEDIGIT:num}", logger::warn);
Map<String, Object> expected = new HashMap<>();
expected.put("num", "1");
@ -430,7 +432,8 @@ public class GrokTests extends ESTestCase {
});
t.start();
};
Grok grok = new Grok(basePatterns, grokPattern, MatcherWatchdog.newInstance(10, 200, System::currentTimeMillis, scheduler));
Grok grok = new Grok(basePatterns, grokPattern, MatcherWatchdog.newInstance(10, 200, System::currentTimeMillis, scheduler),
logger::warn);
Exception e = expectThrows(RuntimeException.class, () -> grok.captures(logLine));
run.set(false);
assertThat(e.getMessage(), equalTo("grok pattern matching was interrupted after [200] ms"));
@ -470,24 +473,32 @@ public class GrokTests extends ESTestCase {
}
public void testUnsupportedBracketsInFieldName() {
Grok grok = new Grok(basePatterns, "%{WORD:unsuppo(r)ted}");
Grok grok = new Grok(basePatterns, "%{WORD:unsuppo(r)ted}", logger::warn);
Map<String, Object> matches = grok.captures("line");
assertNull(matches);
}
public void testJavaClassPatternWithUnderscore() {
Grok grok = new Grok(basePatterns, "%{JAVACLASS}");
Grok grok = new Grok(basePatterns, "%{JAVACLASS}", logger::warn);
assertThat(grok.match("Test_Class.class"), is(true));
}
public void testJavaFilePatternWithSpaces() {
Grok grok = new Grok(basePatterns, "%{JAVAFILE}");
Grok grok = new Grok(basePatterns, "%{JAVAFILE}", logger::warn);
assertThat(grok.match("Test Class.java"), is(true));
}
public void testLogCallBack(){
AtomicReference<String> message = new AtomicReference<>();
Grok grok = new Grok(basePatterns, ".*\\[.*%{SPACE}*\\].*", message::set);
grok.match("[foo]");
//this message comes from Joni, so updates to Joni may change the expectation
assertThat(message.get(), containsString("regular expression has redundant nested repeat operator"));
}
private void assertGrokedField(String fieldName) {
String line = "foo";
Grok grok = new Grok(basePatterns, "%{WORD:" + fieldName + "}");
Grok grok = new Grok(basePatterns, "%{WORD:" + fieldName + "}", logger::warn);
Map<String, Object> matches = grok.captures(line);
assertEquals(line, matches.get(fieldName));
}

View File

@ -40,3 +40,16 @@ testClusters.integTest {
// (this is because the integTest node is not using default distribution, but only the minimal number of required modules)
module project(':modules:lang-mustache').tasks.bundlePlugin.archiveFile
}
thirdPartyAudit.ignoreMissingClasses(
// from log4j
'org.osgi.framework.AdaptPermission',
'org.osgi.framework.AdminPermission',
'org.osgi.framework.Bundle',
'org.osgi.framework.BundleActivator',
'org.osgi.framework.BundleContext',
'org.osgi.framework.BundleEvent',
'org.osgi.framework.SynchronousBundleListener',
'org.osgi.framework.wiring.BundleWire',
'org.osgi.framework.wiring.BundleWiring'
)

View File

@ -19,6 +19,8 @@
package org.elasticsearch.ingest.common;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.elasticsearch.grok.Grok;
import org.elasticsearch.grok.MatcherWatchdog;
import org.elasticsearch.ingest.AbstractProcessor;
@ -36,6 +38,7 @@ public final class GrokProcessor extends AbstractProcessor {
public static final String TYPE = "grok";
private static final String PATTERN_MATCH_KEY = "_ingest._grok_match_index";
private static final Logger logger = LogManager.getLogger(GrokProcessor.class);
private final String matchField;
private final List<String> matchPatterns;
@ -48,9 +51,12 @@ public final class GrokProcessor extends AbstractProcessor {
super(tag);
this.matchField = matchField;
this.matchPatterns = matchPatterns;
this.grok = new Grok(patternBank, combinePatterns(matchPatterns, traceMatch), matcherWatchdog);
this.grok = new Grok(patternBank, combinePatterns(matchPatterns, traceMatch), matcherWatchdog, logger::debug);
this.traceMatch = traceMatch;
this.ignoreMissing = ignoreMissing;
// Joni warnings are only emitted on an attempt to match, and the warning emitted for every call to match which is too verbose
// so here we emit a warning (if there is one) to the logfile at warn level on construction / processor creation.
new Grok(patternBank, combinePatterns(matchPatterns, traceMatch), matcherWatchdog, logger::warn).match("___nomatch___");
}
@Override

View File

@ -5,6 +5,8 @@
*/
package org.elasticsearch.xpack.ml.filestructurefinder;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.elasticsearch.common.collect.Tuple;
import org.elasticsearch.common.util.set.Sets;
import org.elasticsearch.grok.Grok;
@ -27,6 +29,7 @@ import java.util.stream.Stream;
public final class FileStructureUtils {
private static final Logger logger = LogManager.getLogger(FileStructureUtils.class);
public static final String DEFAULT_TIMESTAMP_FIELD = "@timestamp";
public static final String MAPPING_TYPE_SETTING = "type";
public static final String MAPPING_FORMAT_SETTING = "format";
@ -61,10 +64,11 @@ public final class FileStructureUtils {
private static final int NUM_TOP_HITS = 10;
// NUMBER Grok pattern doesn't support scientific notation, so we extend it
private static final Grok NUMBER_GROK = new Grok(Grok.getBuiltinPatterns(), "^%{NUMBER}(?:[eE][+-]?[0-3]?[0-9]{1,2})?$",
TimeoutChecker.watchdog);
private static final Grok IP_GROK = new Grok(Grok.getBuiltinPatterns(), "^%{IP}$", TimeoutChecker.watchdog);
private static final Grok GEO_POINT_WKT = new Grok(EXTENDED_PATTERNS, "^%{WKT_POINT}$", TimeoutChecker.watchdog);
private static final Grok GEO_WKT = new Grok(EXTENDED_PATTERNS, "^(?:%{WKT_ANY}|%{WKT_GEOMETRYCOLLECTION})$", TimeoutChecker.watchdog);
TimeoutChecker.watchdog, logger::warn);
private static final Grok IP_GROK = new Grok(Grok.getBuiltinPatterns(), "^%{IP}$", TimeoutChecker.watchdog, logger::warn);
private static final Grok GEO_POINT_WKT = new Grok(EXTENDED_PATTERNS, "^%{WKT_POINT}$", TimeoutChecker.watchdog, logger::warn);
private static final Grok GEO_WKT = new Grok(EXTENDED_PATTERNS, "^(?:%{WKT_ANY}|%{WKT_GEOMETRYCOLLECTION})$", TimeoutChecker.watchdog,
logger::warn);
private static final int KEYWORD_MAX_LEN = 256;
private static final int KEYWORD_MAX_SPACES = 5;

View File

@ -5,6 +5,8 @@
*/
package org.elasticsearch.xpack.ml.filestructurefinder;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.elasticsearch.common.collect.Tuple;
import org.elasticsearch.grok.Grok;
import org.elasticsearch.xpack.core.ml.filestructurefinder.FieldStats;
@ -30,6 +32,7 @@ import java.util.stream.Collectors;
*/
public final class GrokPatternCreator {
private static final Logger logger = LogManager.getLogger(GrokPatternCreator.class);
private static final Map<Character, Boolean> PUNCTUATION_OR_SPACE_NEEDS_ESCAPING;
static {
HashMap<Character, Boolean> punctuationOrSpaceNeedsEscaping = new HashMap<>();
@ -498,7 +501,7 @@ public final class GrokPatternCreator {
grok = new Grok(grokPatternDefinitions,
"(?m)%{DATA:" + PREFACE + "}" + Objects.requireNonNull(preBreak) +
"%{" + grokPatternName + ":" + VALUE + "}" + Objects.requireNonNull(postBreak) + "%{GREEDYDATA:" + EPILOGUE + "}",
TimeoutChecker.watchdog);
TimeoutChecker.watchdog, logger::warn);
}
@Override
@ -592,7 +595,7 @@ public final class GrokPatternCreator {
throw new IllegalStateException("Cannot process KV matches until a field name has been determined");
}
Grok grok = new Grok(Grok.getBuiltinPatterns(), "(?m)%{DATA:" + PREFACE + "}\\b" +
fieldName + "=%{USER:" + VALUE + "}%{GREEDYDATA:" + EPILOGUE + "}", TimeoutChecker.watchdog);
fieldName + "=%{USER:" + VALUE + "}%{GREEDYDATA:" + EPILOGUE + "}", TimeoutChecker.watchdog, logger::warn);
Collection<String> values = new ArrayList<>();
for (String snippet : snippets) {
Map<String, Object> captures = grok.captures(snippet);
@ -666,7 +669,7 @@ public final class GrokPatternCreator {
private FullMatchGrokPatternCandidate(String grokPattern, String timeField, Map<String, String> grokPatternDefinitions) {
this.grokPattern = grokPattern;
this.timeField = timeField;
grok = new Grok(grokPatternDefinitions, grokPattern, TimeoutChecker.watchdog);
grok = new Grok(grokPatternDefinitions, grokPattern, TimeoutChecker.watchdog, logger::warn);
}
public String getTimeField() {

View File

@ -5,6 +5,8 @@
*/
package org.elasticsearch.xpack.ml.filestructurefinder;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.elasticsearch.common.Nullable;
import org.elasticsearch.common.SuppressForbidden;
import org.elasticsearch.common.collect.Tuple;
@ -48,6 +50,7 @@ public final class TimestampFormatFinder {
private static final String PREFACE = "preface";
private static final String EPILOGUE = "epilogue";
private static final Logger logger = LogManager.getLogger(TimestampFormatFinder.class);
private static final String PUNCTUATION_THAT_NEEDS_ESCAPING_IN_REGEX = "\\|()[]{}^$.*?";
private static final String FRACTIONAL_SECOND_SEPARATORS = ":.,";
private static final char INDETERMINATE_FIELD_PLACEHOLDER = '?';
@ -1400,8 +1403,9 @@ public final class TimestampFormatFinder {
this.strictGrokPattern = Objects.requireNonNull(strictGrokPattern);
// The (?m) here has the Ruby meaning, which is equivalent to (?s) in Java
this.strictSearchGrok = new Grok(Grok.getBuiltinPatterns(), "(?m)%{DATA:" + PREFACE + "}" + strictGrokPattern +
"%{GREEDYDATA:" + EPILOGUE + "}", TimeoutChecker.watchdog);
this.strictFullMatchGrok = new Grok(Grok.getBuiltinPatterns(), "^" + strictGrokPattern + "$", TimeoutChecker.watchdog);
"%{GREEDYDATA:" + EPILOGUE + "}", TimeoutChecker.watchdog, logger::warn);
this.strictFullMatchGrok = new Grok(Grok.getBuiltinPatterns(), "^" + strictGrokPattern + "$", TimeoutChecker.watchdog,
logger::warn);
this.outputGrokPatternName = Objects.requireNonNull(outputGrokPatternName);
this.quickRuleOutBitSets = quickRuleOutPatterns.stream().map(TimestampFormatFinder::stringToNumberPosBitSet)
.collect(Collectors.toList());

View File

@ -272,7 +272,7 @@ public final class GrokPatternCreator {
this.grokPatternName = grokPatternName;
this.fieldName = fieldName;
this.grok = new Grok(Grok.getBuiltinPatterns(), "%{DATA:" + PREFACE + "}" + preBreak + "%{" + grokPatternName + ":this}" +
postBreak + "%{GREEDYDATA:" + EPILOGUE + "}");
postBreak + "%{GREEDYDATA:" + EPILOGUE + "}", logger::warn);
}
}
}

View File

@ -80,7 +80,7 @@ public class TimeoutCheckerTests extends FileStructureTestCase {
}
public void testGrokCaptures() throws Exception {
Grok grok = new Grok(Grok.getBuiltinPatterns(), "{%DATA:data}{%GREEDYDATA:greedydata}", TimeoutChecker.watchdog);
Grok grok = new Grok(Grok.getBuiltinPatterns(), "{%DATA:data}{%GREEDYDATA:greedydata}", TimeoutChecker.watchdog, logger::warn);
TimeValue timeout = TimeValue.timeValueMillis(1);
try (TimeoutChecker timeoutChecker = new TimeoutChecker("grok captures test", timeout, scheduler)) {