Raname grok's built-in patterns (backport of #62735) (#62765)

This reworks the code around grok's built-in patterns to name things
more like the rest of the code. Its not a big deal, but I'm just more
used to having `public static final` constants in SHOUTING_SNAKE_CASE.
This commit is contained in:
Nik Everett 2020-09-22 13:06:43 -04:00 committed by GitHub
parent c995e73c6d
commit 39a617773d
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
9 changed files with 52 additions and 59 deletions

View File

@ -32,7 +32,6 @@ import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.UncheckedIOException;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.Collections;
@ -45,6 +44,10 @@ import java.util.Map;
import java.util.function.Consumer;
public final class Grok {
/**
* Patterns built in to the grok library.
*/
public static final Map<String, String> BUILTIN_PATTERNS = loadBuiltinPatterns();
private static final String NAME_GROUP = "name";
private static final String SUBNAME_GROUP = "subname";
@ -63,17 +66,8 @@ public final class Grok {
private static final Regex GROK_PATTERN_REGEX = new Regex(GROK_PATTERN.getBytes(StandardCharsets.UTF_8), 0,
GROK_PATTERN.getBytes(StandardCharsets.UTF_8).length, Option.NONE, UTF8Encoding.INSTANCE, Syntax.DEFAULT);
private static final Map<String, String> builtinPatterns;
private static final int MAX_TO_REGEX_ITERATIONS = 100_000; //sanity limit
static {
try {
builtinPatterns = loadBuiltinPatterns();
} catch (IOException e) {
throw new UncheckedIOException("unable to load built-in grok patterns", e);
}
}
private final Map<String, String> patternBank;
private final boolean namedCaptures;
private final Regex compiledExpression;
@ -282,21 +276,23 @@ public final class Grok {
}
}
public static Map<String, String> getBuiltinPatterns() {
return builtinPatterns;
}
private static Map<String, String> loadBuiltinPatterns() throws IOException {
// Code for loading built-in grok patterns packaged with the jar file:
String[] PATTERN_NAMES = new String[] {
/**
* Load built-in patterns.
*/
private static Map<String, String> loadBuiltinPatterns() {
String[] patternNames = new String[] {
"aws", "bacula", "bind", "bro", "exim", "firewalls", "grok-patterns", "haproxy",
"java", "junos", "linux-syslog", "maven", "mcollective-patterns", "mongodb", "nagios",
"postgresql", "rails", "redis", "ruby", "squid"
};
Map<String, String> builtinPatterns = new LinkedHashMap<>();
for (String pattern : PATTERN_NAMES) {
try(InputStream is = Grok.class.getResourceAsStream("/patterns/" + pattern)) {
loadPatterns(builtinPatterns, is);
for (String pattern : patternNames) {
try {
try(InputStream is = Grok.class.getResourceAsStream("/patterns/" + pattern)) {
loadPatterns(builtinPatterns, is);
}
} catch (IOException e) {
throw new RuntimeException("failed to load built-in patterns", e);
}
}
return Collections.unmodifiableMap(builtinPatterns);

View File

@ -38,11 +38,9 @@ import static org.hamcrest.Matchers.nullValue;
public class GrokTests extends ESTestCase {
private static final Map<String, String> basePatterns = Grok.getBuiltinPatterns();
public void testMatchWithoutCaptures() {
String line = "value";
Grok grok = new Grok(basePatterns, "value", logger::warn);
Grok grok = new Grok(Grok.BUILTIN_PATTERNS, "value", logger::warn);
Map<String, Object> matches = grok.captures(line);
assertEquals(0, matches.size());
}
@ -54,7 +52,7 @@ public class GrokTests extends ESTestCase {
public void testSimpleSyslogLine() {
String line = "Mar 16 00:01:25 evita postfix/smtpd[1713]: connect from camomile.cloud9.net[168.100.1.3]";
Grok grok = new Grok(basePatterns, "%{SYSLOGLINE}", logger::warn);
Grok grok = new Grok(Grok.BUILTIN_PATTERNS, "%{SYSLOGLINE}", logger::warn);
Map<String, Object> matches = grok.captures(line);
assertEquals("evita", matches.get("logsource"));
assertEquals("Mar 16 00:01:25", matches.get("timestamp"));
@ -66,7 +64,7 @@ public class GrokTests extends ESTestCase {
public void testSyslog5424Line() {
String line = "<191>1 2009-06-30T18:30:00+02:00 paxton.local grokdebug 4123 - [id1 foo=\\\"bar\\\"][id2 baz=\\\"something\\\"] " +
"Hello, syslog.";
Grok grok = new Grok(basePatterns, "%{SYSLOG5424LINE}", logger::warn);
Grok grok = new Grok(Grok.BUILTIN_PATTERNS, "%{SYSLOG5424LINE}", logger::warn);
Map<String, Object> matches = grok.captures(line);
assertEquals("191", matches.get("syslog5424_pri"));
assertEquals("1", matches.get("syslog5424_ver"));
@ -81,13 +79,13 @@ public class GrokTests extends ESTestCase {
public void testDatePattern() {
String line = "fancy 12-12-12 12:12:12";
Grok grok = new Grok(basePatterns, "(?<timestamp>%{DATE_EU} %{TIME})", logger::warn);
Grok grok = new Grok(Grok.BUILTIN_PATTERNS, "(?<timestamp>%{DATE_EU} %{TIME})", logger::warn);
Map<String, Object> matches = grok.captures(line);
assertEquals("12-12-12 12:12:12", matches.get("timestamp"));
}
public void testNilCoercedValues() {
Grok grok = new Grok(basePatterns, "test (N/A|%{BASE10NUM:duration:float}ms)", logger::warn);
Grok grok = new Grok(Grok.BUILTIN_PATTERNS, "test (N/A|%{BASE10NUM:duration:float}ms)", logger::warn);
Map<String, Object> matches = grok.captures("test 28.4ms");
assertEquals(28.4f, matches.get("duration"));
matches = grok.captures("test N/A");
@ -95,7 +93,7 @@ public class GrokTests extends ESTestCase {
}
public void testNilWithNoCoercion() {
Grok grok = new Grok(basePatterns, "test (N/A|%{BASE10NUM:duration}ms)", logger::warn);
Grok grok = new Grok(Grok.BUILTIN_PATTERNS, "test (N/A|%{BASE10NUM:duration}ms)", logger::warn);
Map<String, Object> matches = grok.captures("test 28.4ms");
assertEquals("28.4", matches.get("duration"));
matches = grok.captures("test N/A");
@ -103,7 +101,7 @@ public class GrokTests extends ESTestCase {
}
public void testUnicodeSyslog() {
Grok grok = new Grok(basePatterns, "<%{POSINT:syslog_pri}>%{SPACE}%{SYSLOGTIMESTAMP:syslog_timestamp} " +
Grok grok = new Grok(Grok.BUILTIN_PATTERNS, "<%{POSINT:syslog_pri}>%{SPACE}%{SYSLOGTIMESTAMP:syslog_timestamp} " +
"%{SYSLOGHOST:syslog_hostname} %{PROG:syslog_program}(:?)(?:\\[%{GREEDYDATA:syslog_pid}\\])?(:?) " +
"%{GREEDYDATA:syslog_message}", logger::warn);
Map<String, Object> matches = grok.captures("<22>Jan 4 07:50:46 mailmaster postfix/policy-spf[9454]: : " +
@ -115,19 +113,19 @@ public class GrokTests extends ESTestCase {
}
public void testNamedFieldsWithWholeTextMatch() {
Grok grok = new Grok(basePatterns, "%{DATE_EU:stimestamp}", logger::warn);
Grok grok = new Grok(Grok.BUILTIN_PATTERNS, "%{DATE_EU:stimestamp}", logger::warn);
Map<String, Object> matches = grok.captures("11/01/01");
assertThat(matches.get("stimestamp"), equalTo("11/01/01"));
}
public void testWithOniguramaNamedCaptures() {
Grok grok = new Grok(basePatterns, "(?<foo>\\w+)", logger::warn);
Grok grok = new Grok(Grok.BUILTIN_PATTERNS, "(?<foo>\\w+)", logger::warn);
Map<String, Object> matches = grok.captures("hello world");
assertThat(matches.get("foo"), equalTo("hello"));
}
public void testISO8601() {
Grok grok = new Grok(basePatterns, "^%{TIMESTAMP_ISO8601}$", logger::warn);
Grok grok = new Grok(Grok.BUILTIN_PATTERNS, "^%{TIMESTAMP_ISO8601}$", logger::warn);
List<String> timeMessages = Arrays.asList(
"2001-01-01T00:00:00",
"1974-03-02T04:09:09",
@ -151,7 +149,7 @@ public class GrokTests extends ESTestCase {
}
public void testNotISO8601() {
Grok grok = new Grok(basePatterns, "^%{TIMESTAMP_ISO8601}$", logger::warn);
Grok grok = new Grok(Grok.BUILTIN_PATTERNS, "^%{TIMESTAMP_ISO8601}$", logger::warn);
List<String> timeMessages = Arrays.asList(
"2001-13-01T00:00:00", // invalid month
"2001-00-01T00:00:00", // invalid month
@ -264,7 +262,7 @@ public class GrokTests extends ESTestCase {
public void testBooleanCaptures() {
String pattern = "%{WORD:name}=%{WORD:status:boolean}";
Grok g = new Grok(basePatterns, pattern, logger::warn);
Grok g = new Grok(Grok.BUILTIN_PATTERNS, pattern, logger::warn);
String text = "active=true";
Map<String, Object> expected = new HashMap<>();
@ -314,7 +312,7 @@ public class GrokTests extends ESTestCase {
String logLine = "31.184.238.164 - - [24/Jul/2014:05:35:37 +0530] \"GET /logs/access.log HTTP/1.0\" 200 69849 " +
"\"http://8rursodiol.enjin.com\" \"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) " +
"Chrome/30.0.1599.12785 YaBrowser/13.12.1599.12785 Safari/537.36\" \"www.dlwindianrailways.com\"";
Grok grok = new Grok(basePatterns, "%{COMBINEDAPACHELOG}", logger::warn);
Grok grok = new Grok(Grok.BUILTIN_PATTERNS, "%{COMBINEDAPACHELOG}", logger::warn);
Map<String, Object> matches = grok.captures(logLine);
assertEquals("31.184.238.164", matches.get("clientip"));
@ -432,7 +430,7 @@ public class GrokTests extends ESTestCase {
});
t.start();
};
Grok grok = new Grok(basePatterns, grokPattern, MatcherWatchdog.newInstance(10, 200, System::currentTimeMillis, scheduler),
Grok grok = new Grok(Grok.BUILTIN_PATTERNS, grokPattern, MatcherWatchdog.newInstance(10, 200, System::currentTimeMillis, scheduler),
logger::warn);
Exception e = expectThrows(RuntimeException.class, () -> grok.captures(logLine));
run.set(false);
@ -473,24 +471,24 @@ public class GrokTests extends ESTestCase {
}
public void testUnsupportedBracketsInFieldName() {
Grok grok = new Grok(basePatterns, "%{WORD:unsuppo(r)ted}", logger::warn);
Grok grok = new Grok(Grok.BUILTIN_PATTERNS, "%{WORD:unsuppo(r)ted}", logger::warn);
Map<String, Object> matches = grok.captures("line");
assertNull(matches);
}
public void testJavaClassPatternWithUnderscore() {
Grok grok = new Grok(basePatterns, "%{JAVACLASS}", logger::warn);
Grok grok = new Grok(Grok.BUILTIN_PATTERNS, "%{JAVACLASS}", logger::warn);
assertThat(grok.match("Test_Class.class"), is(true));
}
public void testJavaFilePatternWithSpaces() {
Grok grok = new Grok(basePatterns, "%{JAVAFILE}", logger::warn);
Grok grok = new Grok(Grok.BUILTIN_PATTERNS, "%{JAVAFILE}", logger::warn);
assertThat(grok.match("Test Class.java"), is(true));
}
public void testLogCallBack(){
AtomicReference<String> message = new AtomicReference<>();
Grok grok = new Grok(basePatterns, ".*\\[.*%{SPACE}*\\].*", message::set);
Grok grok = new Grok(Grok.BUILTIN_PATTERNS, ".*\\[.*%{SPACE}*\\].*", message::set);
grok.match("[foo]");
//this message comes from Joni, so updates to Joni may change the expectation
assertThat(message.get(), containsString("regular expression has redundant nested repeat operator"));
@ -498,7 +496,7 @@ public class GrokTests extends ESTestCase {
private void assertGrokedField(String fieldName) {
String line = "foo";
Grok grok = new Grok(basePatterns, "%{WORD:" + fieldName + "}", logger::warn);
Grok grok = new Grok(Grok.BUILTIN_PATTERNS, "%{WORD:" + fieldName + "}", logger::warn);
Map<String, Object> matches = grok.captures(line);
assertEquals(line, matches.get(fieldName));
}

View File

@ -32,6 +32,7 @@ import org.elasticsearch.common.io.stream.StreamInput;
import org.elasticsearch.common.io.stream.StreamOutput;
import org.elasticsearch.common.xcontent.ToXContentObject;
import org.elasticsearch.common.xcontent.XContentBuilder;
import org.elasticsearch.grok.Grok;
import org.elasticsearch.rest.BaseRestHandler;
import org.elasticsearch.rest.RestRequest;
import org.elasticsearch.rest.action.RestToXContentListener;
@ -44,7 +45,6 @@ import java.util.Map;
import java.util.TreeMap;
import static java.util.Collections.singletonList;
import static org.elasticsearch.ingest.common.IngestCommonPlugin.GROK_PATTERNS;
import static org.elasticsearch.rest.RestRequest.Method.GET;
public class GrokProcessorGetAction extends ActionType<GrokProcessorGetAction.Response> {
@ -125,7 +125,7 @@ public class GrokProcessorGetAction extends ActionType<GrokProcessorGetAction.Re
@Inject
public TransportAction(TransportService transportService, ActionFilters actionFilters) {
this(transportService, actionFilters, GROK_PATTERNS);
this(transportService, actionFilters, Grok.BUILTIN_PATTERNS);
}
// visible for testing

View File

@ -49,7 +49,6 @@ import java.util.function.Supplier;
public class IngestCommonPlugin extends Plugin implements ActionPlugin, IngestPlugin {
static final Map<String, String> GROK_PATTERNS = Grok.getBuiltinPatterns();
static final Setting<TimeValue> WATCHDOG_INTERVAL =
Setting.timeSetting("ingest.grok.watchdog.interval", TimeValue.timeValueSeconds(1), Setting.Property.NodeScope);
static final Setting<TimeValue> WATCHDOG_MAX_EXECUTION_TIME =
@ -77,7 +76,7 @@ public class IngestCommonPlugin extends Plugin implements ActionPlugin, IngestPl
processors.put(ForEachProcessor.TYPE, new ForEachProcessor.Factory(parameters.scriptService));
processors.put(DateIndexNameProcessor.TYPE, new DateIndexNameProcessor.Factory(parameters.scriptService));
processors.put(SortProcessor.TYPE, new SortProcessor.Factory());
processors.put(GrokProcessor.TYPE, new GrokProcessor.Factory(GROK_PATTERNS, createGrokThreadWatchdog(parameters)));
processors.put(GrokProcessor.TYPE, new GrokProcessor.Factory(Grok.BUILTIN_PATTERNS, createGrokThreadWatchdog(parameters)));
processors.put(ScriptProcessor.TYPE, new ScriptProcessor.Factory(parameters.scriptService));
processors.put(DotExpanderProcessor.TYPE, new DotExpanderProcessor.Factory());
processors.put(JsonProcessor.TYPE, new JsonProcessor.Factory());

View File

@ -58,15 +58,15 @@ public final class FileStructureUtils {
"(?:%{WKT_POINT}|%{WKT_LINESTRING}|%{WKT_MULTIPOINT}|%{WKT_POLYGON}|%{WKT_MULTILINESTRING}|%{WKT_MULTIPOLYGON}|%{WKT_BBOX})"
);
patterns.put("WKT_GEOMETRYCOLLECTION", "GEOMETRYCOLLECTION \\(%{WKT_ANY}(?:, %{WKT_ANY})\\)");
patterns.putAll(Grok.getBuiltinPatterns());
patterns.putAll(Grok.BUILTIN_PATTERNS);
EXTENDED_PATTERNS = Collections.unmodifiableMap(patterns);
}
private static final int NUM_TOP_HITS = 10;
// NUMBER Grok pattern doesn't support scientific notation, so we extend it
private static final Grok NUMBER_GROK = new Grok(Grok.getBuiltinPatterns(), "^%{NUMBER}(?:[eE][+-]?[0-3]?[0-9]{1,2})?$",
private static final Grok NUMBER_GROK = new Grok(Grok.BUILTIN_PATTERNS, "^%{NUMBER}(?:[eE][+-]?[0-3]?[0-9]{1,2})?$",
TimeoutChecker.watchdog, logger::warn);
private static final Grok IP_GROK = new Grok(Grok.getBuiltinPatterns(), "^%{IP}$", TimeoutChecker.watchdog, logger::warn);
private static final Grok IP_GROK = new Grok(Grok.BUILTIN_PATTERNS, "^%{IP}$", TimeoutChecker.watchdog, logger::warn);
private static final Grok GEO_POINT_WKT = new Grok(EXTENDED_PATTERNS, "^%{WKT_POINT}$", TimeoutChecker.watchdog, logger::warn);
private static final Grok GEO_WKT = new Grok(EXTENDED_PATTERNS, "^(?:%{WKT_ANY}|%{WKT_GEOMETRYCOLLECTION})$", TimeoutChecker.watchdog,
logger::warn);

View File

@ -150,9 +150,9 @@ public final class GrokPatternCreator {
this.mappings = mappings;
this.fieldStats = fieldStats;
if (customGrokPatternDefinitions.isEmpty()) {
grokPatternDefinitions = Grok.getBuiltinPatterns();
grokPatternDefinitions = Grok.BUILTIN_PATTERNS;
} else {
grokPatternDefinitions = new HashMap<>(Grok.getBuiltinPatterns());
grokPatternDefinitions = new HashMap<>(Grok.BUILTIN_PATTERNS);
grokPatternDefinitions.putAll(customGrokPatternDefinitions);
}
this.timeoutChecker = Objects.requireNonNull(timeoutChecker);
@ -457,7 +457,7 @@ public final class GrokPatternCreator {
*/
ValueOnlyGrokPatternCandidate(String grokPatternName, String mappingType, String fieldName) {
this(grokPatternName, Collections.singletonMap(FileStructureUtils.MAPPING_TYPE_SETTING, mappingType), fieldName,
"\\b", "\\b", Grok.getBuiltinPatterns());
"\\b", "\\b", Grok.BUILTIN_PATTERNS);
}
/**
@ -481,7 +481,7 @@ public final class GrokPatternCreator {
*/
ValueOnlyGrokPatternCandidate(String grokPatternName, String mappingType, String fieldName, String preBreak, String postBreak) {
this(grokPatternName, Collections.singletonMap(FileStructureUtils.MAPPING_TYPE_SETTING, mappingType), fieldName,
preBreak, postBreak, Grok.getBuiltinPatterns());
preBreak, postBreak, Grok.BUILTIN_PATTERNS);
}
/**
@ -594,7 +594,7 @@ public final class GrokPatternCreator {
if (fieldName == null) {
throw new IllegalStateException("Cannot process KV matches until a field name has been determined");
}
Grok grok = new Grok(Grok.getBuiltinPatterns(), "(?m)%{DATA:" + PREFACE + "}\\b" +
Grok grok = new Grok(Grok.BUILTIN_PATTERNS, "(?m)%{DATA:" + PREFACE + "}\\b" +
fieldName + "=%{USER:" + VALUE + "}%{GREEDYDATA:" + EPILOGUE + "}", TimeoutChecker.watchdog, logger::warn);
Collection<String> values = new ArrayList<>();
for (String snippet : snippets) {
@ -649,7 +649,7 @@ public final class GrokPatternCreator {
private final Grok grok;
static FullMatchGrokPatternCandidate fromGrokPatternName(String grokPatternName, String timeField) {
return new FullMatchGrokPatternCandidate("%{" + grokPatternName + "}", timeField, Grok.getBuiltinPatterns());
return new FullMatchGrokPatternCandidate("%{" + grokPatternName + "}", timeField, Grok.BUILTIN_PATTERNS);
}
static FullMatchGrokPatternCandidate fromGrokPatternName(String grokPatternName, String timeField,
@ -658,7 +658,7 @@ public final class GrokPatternCreator {
}
static FullMatchGrokPatternCandidate fromGrokPattern(String grokPattern, String timeField) {
return new FullMatchGrokPatternCandidate(grokPattern, timeField, Grok.getBuiltinPatterns());
return new FullMatchGrokPatternCandidate(grokPattern, timeField, Grok.BUILTIN_PATTERNS);
}
static FullMatchGrokPatternCandidate fromGrokPattern(String grokPattern, String timeField,

View File

@ -1466,9 +1466,9 @@ public final class TimestampFormatFinder {
this.simplePattern = Pattern.compile(simpleRegex, Pattern.MULTILINE);
this.strictGrokPattern = Objects.requireNonNull(strictGrokPattern);
// The (?m) here has the Ruby meaning, which is equivalent to (?s) in Java
this.strictSearchGrok = new Grok(Grok.getBuiltinPatterns(), "(?m)%{DATA:" + PREFACE + "}" + strictGrokPattern +
this.strictSearchGrok = new Grok(Grok.BUILTIN_PATTERNS, "(?m)%{DATA:" + PREFACE + "}" + strictGrokPattern +
"%{GREEDYDATA:" + EPILOGUE + "}", TimeoutChecker.watchdog, logger::warn);
this.strictFullMatchGrok = new Grok(Grok.getBuiltinPatterns(), "^" + strictGrokPattern + "$", TimeoutChecker.watchdog,
this.strictFullMatchGrok = new Grok(Grok.BUILTIN_PATTERNS, "^" + strictGrokPattern + "$", TimeoutChecker.watchdog,
logger::warn);
this.outputGrokPatternName = Objects.requireNonNull(outputGrokPatternName);
this.quickRuleOutBitSets = quickRuleOutPatterns.stream().map(TimestampFormatFinder::stringToNumberPosBitSet)

View File

@ -271,7 +271,7 @@ public final class GrokPatternCreator {
GrokPatternCandidate(String grokPatternName, String fieldName, String preBreak, String postBreak) {
this.grokPatternName = grokPatternName;
this.fieldName = fieldName;
this.grok = new Grok(Grok.getBuiltinPatterns(), "%{DATA:" + PREFACE + "}" + preBreak + "%{" + grokPatternName + ":this}" +
this.grok = new Grok(Grok.BUILTIN_PATTERNS, "%{DATA:" + PREFACE + "}" + preBreak + "%{" + grokPatternName + ":this}" +
postBreak + "%{GREEDYDATA:" + EPILOGUE + "}", logger::warn);
}
}

View File

@ -78,7 +78,7 @@ public class TimeoutCheckerTests extends FileStructureTestCase {
}
public void testGrokCaptures() throws Exception {
Grok grok = new Grok(Grok.getBuiltinPatterns(), "{%DATA:data}{%GREEDYDATA:greedydata}", TimeoutChecker.watchdog, logger::warn);
Grok grok = new Grok(Grok.BUILTIN_PATTERNS, "{%DATA:data}{%GREEDYDATA:greedydata}", TimeoutChecker.watchdog, logger::warn);
TimeValue timeout = TimeValue.timeValueMillis(1);
try (TimeoutChecker timeoutChecker = new TimeoutChecker("grok captures test", timeout, scheduler)) {