This extracts the configuration for extracting values from a groked string when building the grok expression to do two things: 1. Create a method exposing that configuration on `Grok` itself which will be used grok `grok` flavored runtime fields. 2. Marginally speed up extracting grok values by skipping a little string manipulation.
This commit is contained in:
parent
fa13585fae
commit
7ffea4621d
|
@ -43,6 +43,8 @@ import java.util.Locale;
|
|||
import java.util.Map;
|
||||
import java.util.function.Consumer;
|
||||
|
||||
import static java.util.Collections.unmodifiableList;
|
||||
|
||||
public final class Grok {
|
||||
/**
|
||||
* Patterns built in to the grok library.
|
||||
|
@ -72,6 +74,7 @@ public final class Grok {
|
|||
private final boolean namedCaptures;
|
||||
private final Regex compiledExpression;
|
||||
private final MatcherWatchdog matcherWatchdog;
|
||||
private final List<GrokCaptureConfig> captureConfig;
|
||||
|
||||
public Grok(Map<String, String> patternBank, String grokPattern, Consumer<String> logCallBack) {
|
||||
this(patternBank, grokPattern, true, MatcherWatchdog.noop(), logCallBack);
|
||||
|
@ -101,6 +104,12 @@ public final class Grok {
|
|||
byte[] expressionBytes = expression.getBytes(StandardCharsets.UTF_8);
|
||||
this.compiledExpression = new Regex(expressionBytes, 0, expressionBytes.length, Option.DEFAULT, UTF8Encoding.INSTANCE,
|
||||
message -> logCallBack.accept(message));
|
||||
|
||||
List<GrokCaptureConfig> captureConfig = new ArrayList<>();
|
||||
for (Iterator<NameEntry> entry = compiledExpression.namedBackrefIterator(); entry.hasNext();) {
|
||||
captureConfig.add(new GrokCaptureConfig(entry.next()));
|
||||
}
|
||||
this.captureConfig = unmodifiableList(captureConfig);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -146,7 +155,7 @@ public final class Grok {
|
|||
}
|
||||
}
|
||||
|
||||
public String groupMatch(String name, Region region, String pattern) {
|
||||
private String groupMatch(String name, Region region, String pattern) {
|
||||
try {
|
||||
int number = GROK_PATTERN_REGEX.nameToBackrefNumber(name.getBytes(StandardCharsets.UTF_8), 0,
|
||||
name.getBytes(StandardCharsets.UTF_8).length, region);
|
||||
|
@ -165,7 +174,7 @@ public final class Grok {
|
|||
*
|
||||
* @return named regex expression
|
||||
*/
|
||||
public String toRegex(String grokPattern) {
|
||||
protected String toRegex(String grokPattern) {
|
||||
StringBuilder res = new StringBuilder();
|
||||
for (int i = 0; i < MAX_TO_REGEX_ITERATIONS; i++) {
|
||||
byte[] grokPatternBytes = grokPattern.getBytes(StandardCharsets.UTF_8);
|
||||
|
@ -255,19 +264,12 @@ public final class Grok {
|
|||
// TODO: I think we should throw an error here?
|
||||
return null;
|
||||
} else if (compiledExpression.numberOfNames() > 0) {
|
||||
Map<String, Object> fields = new HashMap<>();
|
||||
Map<String, Object> fields = new HashMap<>(captureConfig.size());
|
||||
Region region = matcher.getEagerRegion();
|
||||
for (Iterator<NameEntry> entry = compiledExpression.namedBackrefIterator(); entry.hasNext();) {
|
||||
NameEntry e = entry.next();
|
||||
String groupName = new String(e.name, e.nameP, e.nameEnd - e.nameP, StandardCharsets.UTF_8);
|
||||
for (int number : e.getBackRefs()) {
|
||||
if (region.beg[number] >= 0) {
|
||||
String matchValue = new String(textAsBytes, region.beg[number], region.end[number] - region.beg[number],
|
||||
StandardCharsets.UTF_8);
|
||||
GrokMatchGroup match = new GrokMatchGroup(groupName, matchValue);
|
||||
fields.put(match.getName(), match.getValue());
|
||||
break;
|
||||
}
|
||||
for (GrokCaptureConfig config: captureConfig) {
|
||||
Object v = config.extract(textAsBytes, region);
|
||||
if (v != null) {
|
||||
fields.put(config.name(), v);
|
||||
}
|
||||
}
|
||||
return fields;
|
||||
|
@ -276,6 +278,13 @@ public final class Grok {
|
|||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* The list of values that this {@linkplain Grok} can capture.
|
||||
*/
|
||||
public List<GrokCaptureConfig> captureConfig() {
|
||||
return captureConfig;
|
||||
}
|
||||
|
||||
/**
|
||||
* Load built-in patterns.
|
||||
*/
|
||||
|
|
|
@ -0,0 +1,67 @@
|
|||
/*
|
||||
* Licensed to Elasticsearch under one or more contributor
|
||||
* license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright
|
||||
* ownership. Elasticsearch licenses this file to you under
|
||||
* the Apache License, Version 2.0 (the "License"); you may
|
||||
* not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
|
||||
package org.elasticsearch.grok;
|
||||
|
||||
import org.joni.NameEntry;
|
||||
import org.joni.Region;
|
||||
|
||||
import java.nio.charset.StandardCharsets;
|
||||
|
||||
/**
|
||||
* Configuration for a value that {@link Grok} can capture.
|
||||
*/
|
||||
public final class GrokCaptureConfig {
|
||||
private final String name;
|
||||
private final GrokCaptureType type;
|
||||
private final int[] backRefs;
|
||||
|
||||
GrokCaptureConfig(NameEntry nameEntry) {
|
||||
String groupName = new String(nameEntry.name, nameEntry.nameP, nameEntry.nameEnd - nameEntry.nameP, StandardCharsets.UTF_8);
|
||||
String[] parts = groupName.split(":");
|
||||
name = parts.length >= 2 ? parts[1] : parts[0];
|
||||
type = parts.length == 3 ? GrokCaptureType.fromString(parts[2]) : GrokCaptureType.STRING;
|
||||
this.backRefs = nameEntry.getBackRefs();
|
||||
}
|
||||
|
||||
/**
|
||||
* The name defined for the field in the pattern.
|
||||
*/
|
||||
public String name() {
|
||||
return name;
|
||||
}
|
||||
|
||||
/**
|
||||
* The type defined for the field in the pattern.
|
||||
*/
|
||||
public GrokCaptureType type() {
|
||||
return type;
|
||||
}
|
||||
|
||||
Object extract(byte[] textAsBytes, Region region) {
|
||||
for (int number : backRefs) {
|
||||
if (region.beg[number] >= 0) {
|
||||
String matchValue = new String(textAsBytes, region.beg[number], region.end[number] - region.beg[number],
|
||||
StandardCharsets.UTF_8);
|
||||
return type.parse(matchValue);
|
||||
}
|
||||
}
|
||||
return null;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,90 @@
|
|||
/*
|
||||
* Licensed to Elasticsearch under one or more contributor
|
||||
* license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright
|
||||
* ownership. Elasticsearch licenses this file to you under
|
||||
* the Apache License, Version 2.0 (the "License"); you may
|
||||
* not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
|
||||
package org.elasticsearch.grok;
|
||||
|
||||
/**
|
||||
* The type defined for the field in the pattern.
|
||||
*/
|
||||
public enum GrokCaptureType {
|
||||
STRING {
|
||||
@Override
|
||||
protected Object parseValue(String str) {
|
||||
return str;
|
||||
}
|
||||
},
|
||||
INTEGER {
|
||||
@Override
|
||||
protected Object parseValue(String str) {
|
||||
return Integer.parseInt(str);
|
||||
}
|
||||
},
|
||||
LONG {
|
||||
@Override
|
||||
protected Object parseValue(String str) {
|
||||
return Long.parseLong(str);
|
||||
}
|
||||
},
|
||||
DOUBLE {
|
||||
@Override
|
||||
protected Object parseValue(String str) {
|
||||
return Double.parseDouble(str);
|
||||
}
|
||||
},
|
||||
FLOAT {
|
||||
@Override
|
||||
protected Object parseValue(String str) {
|
||||
return Float.parseFloat(str);
|
||||
}
|
||||
},
|
||||
BOOLEAN {
|
||||
@Override
|
||||
protected Object parseValue(String str) {
|
||||
return Boolean.parseBoolean(str);
|
||||
}
|
||||
};
|
||||
|
||||
final Object parse(String str) {
|
||||
if (str == null) {
|
||||
return null;
|
||||
}
|
||||
return parseValue(str);
|
||||
}
|
||||
|
||||
protected abstract Object parseValue(String str);
|
||||
|
||||
static GrokCaptureType fromString(String str) {
|
||||
switch (str) {
|
||||
case "string":
|
||||
return STRING;
|
||||
case "int":
|
||||
return INTEGER;
|
||||
case "long":
|
||||
return LONG;
|
||||
case "double":
|
||||
return DOUBLE;
|
||||
case "float":
|
||||
return FLOAT;
|
||||
case "boolean":
|
||||
return BOOLEAN;
|
||||
default:
|
||||
return STRING;
|
||||
}
|
||||
}
|
||||
}
|
|
@ -1,68 +0,0 @@
|
|||
/*
|
||||
* Licensed to Elasticsearch under one or more contributor
|
||||
* license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright
|
||||
* ownership. Elasticsearch licenses this file to you under
|
||||
* the Apache License, Version 2.0 (the "License"); you may
|
||||
* not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
|
||||
package org.elasticsearch.grok;
|
||||
|
||||
final class GrokMatchGroup {
|
||||
private static final String DEFAULT_TYPE = "string";
|
||||
private final String patternName;
|
||||
private final String fieldName;
|
||||
private final String type;
|
||||
private final String groupValue;
|
||||
|
||||
GrokMatchGroup(String groupName, String groupValue) {
|
||||
String[] parts = groupName.split(":");
|
||||
patternName = parts[0];
|
||||
if (parts.length >= 2) {
|
||||
fieldName = parts[1];
|
||||
} else {
|
||||
fieldName = null;
|
||||
}
|
||||
|
||||
if (parts.length == 3) {
|
||||
type = parts[2];
|
||||
} else {
|
||||
type = DEFAULT_TYPE;
|
||||
}
|
||||
this.groupValue = groupValue;
|
||||
}
|
||||
|
||||
public String getName() {
|
||||
return (fieldName == null) ? patternName : fieldName;
|
||||
}
|
||||
|
||||
public Object getValue() {
|
||||
if (groupValue == null) { return null; }
|
||||
|
||||
switch(type) {
|
||||
case "int":
|
||||
return Integer.parseInt(groupValue);
|
||||
case "long":
|
||||
return Long.parseLong(groupValue);
|
||||
case "double":
|
||||
return Double.parseDouble(groupValue);
|
||||
case "float":
|
||||
return Float.parseFloat(groupValue);
|
||||
case "boolean":
|
||||
return Boolean.parseBoolean(groupValue);
|
||||
default:
|
||||
return groupValue;
|
||||
}
|
||||
}
|
||||
}
|
|
@ -31,6 +31,12 @@ import java.util.concurrent.atomic.AtomicBoolean;
|
|||
import java.util.concurrent.atomic.AtomicReference;
|
||||
import java.util.function.BiConsumer;
|
||||
|
||||
import static org.elasticsearch.grok.GrokCaptureType.BOOLEAN;
|
||||
import static org.elasticsearch.grok.GrokCaptureType.DOUBLE;
|
||||
import static org.elasticsearch.grok.GrokCaptureType.FLOAT;
|
||||
import static org.elasticsearch.grok.GrokCaptureType.INTEGER;
|
||||
import static org.elasticsearch.grok.GrokCaptureType.LONG;
|
||||
import static org.elasticsearch.grok.GrokCaptureType.STRING;
|
||||
import static org.hamcrest.Matchers.containsString;
|
||||
import static org.hamcrest.Matchers.equalTo;
|
||||
import static org.hamcrest.Matchers.is;
|
||||
|
@ -53,6 +59,19 @@ public class GrokTests extends ESTestCase {
|
|||
public void testSimpleSyslogLine() {
|
||||
String line = "Mar 16 00:01:25 evita postfix/smtpd[1713]: connect from camomile.cloud9.net[168.100.1.3]";
|
||||
Grok grok = new Grok(Grok.BUILTIN_PATTERNS, "%{SYSLOGLINE}", logger::warn);
|
||||
assertCaptureConfig(
|
||||
grok,
|
||||
org.elasticsearch.common.collect.Map.ofEntries(
|
||||
org.elasticsearch.common.collect.Map.entry("facility", STRING),
|
||||
org.elasticsearch.common.collect.Map.entry("logsource", STRING),
|
||||
org.elasticsearch.common.collect.Map.entry("message", STRING),
|
||||
org.elasticsearch.common.collect.Map.entry("pid", STRING),
|
||||
org.elasticsearch.common.collect.Map.entry("priority", STRING),
|
||||
org.elasticsearch.common.collect.Map.entry("program", STRING),
|
||||
org.elasticsearch.common.collect.Map.entry("timestamp", STRING),
|
||||
org.elasticsearch.common.collect.Map.entry("timestamp8601", STRING)
|
||||
)
|
||||
);
|
||||
Map<String, Object> matches = grok.captures(line);
|
||||
assertEquals("evita", matches.get("logsource"));
|
||||
assertEquals("Mar 16 00:01:25", matches.get("timestamp"));
|
||||
|
@ -65,6 +84,20 @@ public class GrokTests extends ESTestCase {
|
|||
String line = "<191>1 2009-06-30T18:30:00+02:00 paxton.local grokdebug 4123 - [id1 foo=\\\"bar\\\"][id2 baz=\\\"something\\\"] " +
|
||||
"Hello, syslog.";
|
||||
Grok grok = new Grok(Grok.BUILTIN_PATTERNS, "%{SYSLOG5424LINE}", logger::warn);
|
||||
assertCaptureConfig(
|
||||
grok,
|
||||
org.elasticsearch.common.collect.Map.ofEntries(
|
||||
org.elasticsearch.common.collect.Map.entry("syslog5424_app", STRING),
|
||||
org.elasticsearch.common.collect.Map.entry("syslog5424_host", STRING),
|
||||
org.elasticsearch.common.collect.Map.entry("syslog5424_msg", STRING),
|
||||
org.elasticsearch.common.collect.Map.entry("syslog5424_msgid", STRING),
|
||||
org.elasticsearch.common.collect.Map.entry("syslog5424_pri", STRING),
|
||||
org.elasticsearch.common.collect.Map.entry("syslog5424_proc", STRING),
|
||||
org.elasticsearch.common.collect.Map.entry("syslog5424_sd", STRING),
|
||||
org.elasticsearch.common.collect.Map.entry("syslog5424_ts", STRING),
|
||||
org.elasticsearch.common.collect.Map.entry("syslog5424_ver", STRING)
|
||||
)
|
||||
);
|
||||
Map<String, Object> matches = grok.captures(line);
|
||||
assertEquals("191", matches.get("syslog5424_pri"));
|
||||
assertEquals("1", matches.get("syslog5424_ver"));
|
||||
|
@ -80,12 +113,14 @@ public class GrokTests extends ESTestCase {
|
|||
public void testDatePattern() {
|
||||
String line = "fancy 12-12-12 12:12:12";
|
||||
Grok grok = new Grok(Grok.BUILTIN_PATTERNS, "(?<timestamp>%{DATE_EU} %{TIME})", logger::warn);
|
||||
assertCaptureConfig(grok, org.elasticsearch.common.collect.Map.of("timestamp", STRING));
|
||||
Map<String, Object> matches = grok.captures(line);
|
||||
assertEquals("12-12-12 12:12:12", matches.get("timestamp"));
|
||||
}
|
||||
|
||||
public void testNilCoercedValues() {
|
||||
Grok grok = new Grok(Grok.BUILTIN_PATTERNS, "test (N/A|%{BASE10NUM:duration:float}ms)", logger::warn);
|
||||
assertCaptureConfig(grok, org.elasticsearch.common.collect.Map.of("duration", FLOAT));
|
||||
Map<String, Object> matches = grok.captures("test 28.4ms");
|
||||
assertEquals(28.4f, matches.get("duration"));
|
||||
matches = grok.captures("test N/A");
|
||||
|
@ -94,6 +129,7 @@ public class GrokTests extends ESTestCase {
|
|||
|
||||
public void testNilWithNoCoercion() {
|
||||
Grok grok = new Grok(Grok.BUILTIN_PATTERNS, "test (N/A|%{BASE10NUM:duration}ms)", logger::warn);
|
||||
assertCaptureConfig(grok, org.elasticsearch.common.collect.Map.of("duration", STRING));
|
||||
Map<String, Object> matches = grok.captures("test 28.4ms");
|
||||
assertEquals("28.4", matches.get("duration"));
|
||||
matches = grok.captures("test N/A");
|
||||
|
@ -104,6 +140,17 @@ public class GrokTests extends ESTestCase {
|
|||
Grok grok = new Grok(Grok.BUILTIN_PATTERNS, "<%{POSINT:syslog_pri}>%{SPACE}%{SYSLOGTIMESTAMP:syslog_timestamp} " +
|
||||
"%{SYSLOGHOST:syslog_hostname} %{PROG:syslog_program}(:?)(?:\\[%{GREEDYDATA:syslog_pid}\\])?(:?) " +
|
||||
"%{GREEDYDATA:syslog_message}", logger::warn);
|
||||
assertCaptureConfig(
|
||||
grok,
|
||||
org.elasticsearch.common.collect.Map.ofEntries(
|
||||
org.elasticsearch.common.collect.Map.entry("syslog_hostname", STRING),
|
||||
org.elasticsearch.common.collect.Map.entry("syslog_message", STRING),
|
||||
org.elasticsearch.common.collect.Map.entry("syslog_pid", STRING),
|
||||
org.elasticsearch.common.collect.Map.entry("syslog_pri", STRING),
|
||||
org.elasticsearch.common.collect.Map.entry("syslog_program", STRING),
|
||||
org.elasticsearch.common.collect.Map.entry("syslog_timestamp", STRING)
|
||||
)
|
||||
);
|
||||
Map<String, Object> matches = grok.captures("<22>Jan 4 07:50:46 mailmaster postfix/policy-spf[9454]: : " +
|
||||
"SPF permerror (Junk encountered in record 'v=spf1 mx a:mail.domain.no ip4:192.168.0.4 <20>all'): Envelope-from: " +
|
||||
"email@domain.no");
|
||||
|
@ -114,18 +161,21 @@ public class GrokTests extends ESTestCase {
|
|||
|
||||
public void testNamedFieldsWithWholeTextMatch() {
|
||||
Grok grok = new Grok(Grok.BUILTIN_PATTERNS, "%{DATE_EU:stimestamp}", logger::warn);
|
||||
assertCaptureConfig(grok, org.elasticsearch.common.collect.Map.of("stimestamp", STRING));
|
||||
Map<String, Object> matches = grok.captures("11/01/01");
|
||||
assertThat(matches.get("stimestamp"), equalTo("11/01/01"));
|
||||
}
|
||||
|
||||
public void testWithOniguramaNamedCaptures() {
|
||||
Grok grok = new Grok(Grok.BUILTIN_PATTERNS, "(?<foo>\\w+)", logger::warn);
|
||||
assertCaptureConfig(grok, org.elasticsearch.common.collect.Map.of("foo", STRING));
|
||||
Map<String, Object> matches = grok.captures("hello world");
|
||||
assertThat(matches.get("foo"), equalTo("hello"));
|
||||
}
|
||||
|
||||
public void testISO8601() {
|
||||
Grok grok = new Grok(Grok.BUILTIN_PATTERNS, "^%{TIMESTAMP_ISO8601}$", logger::warn);
|
||||
assertCaptureConfig(grok, org.elasticsearch.common.collect.Map.of());
|
||||
List<String> timeMessages = Arrays.asList(
|
||||
"2001-01-01T00:00:00",
|
||||
"1974-03-02T04:09:09",
|
||||
|
@ -150,6 +200,7 @@ public class GrokTests extends ESTestCase {
|
|||
|
||||
public void testNotISO8601() {
|
||||
Grok grok = new Grok(Grok.BUILTIN_PATTERNS, "^%{TIMESTAMP_ISO8601}$", logger::warn);
|
||||
assertCaptureConfig(grok, org.elasticsearch.common.collect.Map.of());
|
||||
List<String> timeMessages = Arrays.asList(
|
||||
"2001-13-01T00:00:00", // invalid month
|
||||
"2001-00-01T00:00:00", // invalid month
|
||||
|
@ -189,6 +240,7 @@ public class GrokTests extends ESTestCase {
|
|||
String text = "wowza !!!Tal!!! - Tal";
|
||||
String pattern = "%{EXCITED_NAME} - %{NAME}";
|
||||
Grok g = new Grok(bank, pattern, false, logger::warn);
|
||||
assertCaptureConfig(g, org.elasticsearch.common.collect.Map.of("EXCITED_NAME_0", STRING, "NAME_21", STRING, "NAME_22", STRING));
|
||||
|
||||
assertEquals("(?<EXCITED_NAME_0>!!!(?<NAME_21>Tal)!!!) - (?<NAME_22>Tal)", g.toRegex(pattern));
|
||||
assertEquals(true, g.match(text));
|
||||
|
@ -263,6 +315,7 @@ public class GrokTests extends ESTestCase {
|
|||
public void testBooleanCaptures() {
|
||||
String pattern = "%{WORD:name}=%{WORD:status:boolean}";
|
||||
Grok g = new Grok(Grok.BUILTIN_PATTERNS, pattern, logger::warn);
|
||||
assertCaptureConfig(g, org.elasticsearch.common.collect.Map.of("name", STRING, "status", BOOLEAN));
|
||||
|
||||
String text = "active=true";
|
||||
Map<String, Object> expected = new HashMap<>();
|
||||
|
@ -280,6 +333,7 @@ public class GrokTests extends ESTestCase {
|
|||
|
||||
String pattern = "%{NUMBER:bytes:float} %{NUMBER:id:long} %{NUMBER:rating:double}";
|
||||
Grok g = new Grok(bank, pattern, logger::warn);
|
||||
assertCaptureConfig(g, org.elasticsearch.common.collect.Map.of("bytes", FLOAT, "id", LONG, "rating", DOUBLE));
|
||||
|
||||
String text = "12009.34 20000000000 4820.092";
|
||||
Map<String, Object> expected = new HashMap<>();
|
||||
|
@ -298,6 +352,7 @@ public class GrokTests extends ESTestCase {
|
|||
|
||||
String pattern = "%{NUMBER:bytes:float} %{NUMBER:status} %{NUMBER}";
|
||||
Grok g = new Grok(bank, pattern, logger::warn);
|
||||
assertCaptureConfig(g, org.elasticsearch.common.collect.Map.of("bytes", FLOAT, "status", STRING));
|
||||
|
||||
String text = "12009.34 200 9032";
|
||||
Map<String, Object> expected = new HashMap<>();
|
||||
|
@ -308,11 +363,39 @@ public class GrokTests extends ESTestCase {
|
|||
assertEquals(expected, actual);
|
||||
}
|
||||
|
||||
public void testGarbageTypeNameBecomesString() {
|
||||
Map<String, String> bank = new HashMap<>();
|
||||
bank.put("BASE10NUM", "(?<![0-9.+-])(?>[+-]?(?:(?:[0-9]+(?:\\.[0-9]+)?)|(?:\\.[0-9]+)))");
|
||||
bank.put("NUMBER", "(?:%{BASE10NUM})");
|
||||
|
||||
String pattern = "%{NUMBER:f:not_a_valid_type}";
|
||||
Grok g = new Grok(bank, pattern, logger::warn);
|
||||
assertCaptureConfig(g, org.elasticsearch.common.collect.Map.of("f", STRING));
|
||||
assertThat(g.captures("12009.34"), equalTo(org.elasticsearch.common.collect.Map.of("f", "12009.34")));
|
||||
}
|
||||
|
||||
public void testApacheLog() {
|
||||
String logLine = "31.184.238.164 - - [24/Jul/2014:05:35:37 +0530] \"GET /logs/access.log HTTP/1.0\" 200 69849 " +
|
||||
"\"http://8rursodiol.enjin.com\" \"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) " +
|
||||
"Chrome/30.0.1599.12785 YaBrowser/13.12.1599.12785 Safari/537.36\" \"www.dlwindianrailways.com\"";
|
||||
Grok grok = new Grok(Grok.BUILTIN_PATTERNS, "%{COMBINEDAPACHELOG}", logger::warn);
|
||||
assertCaptureConfig(
|
||||
grok,
|
||||
org.elasticsearch.common.collect.Map.ofEntries(
|
||||
org.elasticsearch.common.collect.Map.entry("agent", STRING),
|
||||
org.elasticsearch.common.collect.Map.entry("auth", STRING),
|
||||
org.elasticsearch.common.collect.Map.entry("bytes", STRING),
|
||||
org.elasticsearch.common.collect.Map.entry("clientip", STRING),
|
||||
org.elasticsearch.common.collect.Map.entry("httpversion", STRING),
|
||||
org.elasticsearch.common.collect.Map.entry("ident", STRING),
|
||||
org.elasticsearch.common.collect.Map.entry("rawrequest", STRING),
|
||||
org.elasticsearch.common.collect.Map.entry("referrer", STRING),
|
||||
org.elasticsearch.common.collect.Map.entry("request", STRING),
|
||||
org.elasticsearch.common.collect.Map.entry("response", STRING),
|
||||
org.elasticsearch.common.collect.Map.entry("timestamp", STRING),
|
||||
org.elasticsearch.common.collect.Map.entry("verb", STRING)
|
||||
)
|
||||
);
|
||||
Map<String, Object> matches = grok.captures(logLine);
|
||||
|
||||
assertEquals("31.184.238.164", matches.get("clientip"));
|
||||
|
@ -373,6 +456,22 @@ public class GrokTests extends ESTestCase {
|
|||
"HTTP/%{NUMBER:httpversion}\" %{NUMBER:response:int} (?:-|%{NUMBER:bytes:int}) %{QS:referrer} %{QS:agent}";
|
||||
|
||||
Grok grok = new Grok(bank, pattern, logger::warn);
|
||||
assertCaptureConfig(
|
||||
grok,
|
||||
org.elasticsearch.common.collect.Map.ofEntries(
|
||||
org.elasticsearch.common.collect.Map.entry("agent", STRING),
|
||||
org.elasticsearch.common.collect.Map.entry("auth", STRING),
|
||||
org.elasticsearch.common.collect.Map.entry("bytes", INTEGER),
|
||||
org.elasticsearch.common.collect.Map.entry("clientip", STRING),
|
||||
org.elasticsearch.common.collect.Map.entry("httpversion", STRING),
|
||||
org.elasticsearch.common.collect.Map.entry("ident", STRING),
|
||||
org.elasticsearch.common.collect.Map.entry("referrer", STRING),
|
||||
org.elasticsearch.common.collect.Map.entry("request", STRING),
|
||||
org.elasticsearch.common.collect.Map.entry("response", INTEGER),
|
||||
org.elasticsearch.common.collect.Map.entry("timestamp", STRING),
|
||||
org.elasticsearch.common.collect.Map.entry("verb", STRING)
|
||||
)
|
||||
);
|
||||
|
||||
Map<String, Object> expected = new HashMap<>();
|
||||
expected.put("clientip", "83.149.9.216");
|
||||
|
@ -404,6 +503,7 @@ public class GrokTests extends ESTestCase {
|
|||
Map<String, String> bank = new HashMap<>();
|
||||
bank.put("SINGLEDIGIT", "[0-9]");
|
||||
Grok grok = new Grok(bank, "%{SINGLEDIGIT:num}%{SINGLEDIGIT:num}", logger::warn);
|
||||
assertCaptureConfig(grok, org.elasticsearch.common.collect.Map.of("num", STRING));
|
||||
|
||||
Map<String, Object> expected = new HashMap<>();
|
||||
expected.put("num", "1");
|
||||
|
@ -500,4 +600,13 @@ public class GrokTests extends ESTestCase {
|
|||
Map<String, Object> matches = grok.captures(line);
|
||||
assertEquals(line, matches.get(fieldName));
|
||||
}
|
||||
|
||||
private void assertCaptureConfig(Grok grok, Map<String, GrokCaptureType> nameToType) {
|
||||
Map<String, GrokCaptureType> fromGrok = new TreeMap<>();
|
||||
for (GrokCaptureConfig config : grok.captureConfig()) {
|
||||
Object old = fromGrok.put(config.name(), config.type());
|
||||
assertThat("duplicates not allowed", old, nullValue());
|
||||
}
|
||||
assertThat(fromGrok, equalTo(new TreeMap<>(nameToType)));
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue