Fixes GrokProcessor's ignorance of named-captures with same name. (#22131)
Grok was originally ignoring potential matches to named-capture groups larger than one. For example, If you had two patterns containing the same named field, but only the second pattern matched, it would fail to pick this up. This PR fixes this by exploring all potential places where a named-capture was used and chooses the first one that matched. Fixes #22117.
This commit is contained in:
parent
9ad49faa86
commit
f56097b57a
|
@ -29,6 +29,7 @@ import org.joni.Syntax;
|
|||
import org.joni.exception.ValueException;
|
||||
|
||||
import java.nio.charset.StandardCharsets;
|
||||
import java.util.Arrays;
|
||||
import java.util.HashMap;
|
||||
import java.util.Iterator;
|
||||
import java.util.Locale;
|
||||
|
@ -125,12 +126,25 @@ final class Grok {
|
|||
return grokPattern;
|
||||
}
|
||||
|
||||
/**
|
||||
* Checks whether a specific text matches the defined grok expression.
|
||||
*
|
||||
* @param text the string to match
|
||||
* @return true if grok expression matches text, false otherwise.
|
||||
*/
|
||||
public boolean match(String text) {
|
||||
Matcher matcher = compiledExpression.matcher(text.getBytes(StandardCharsets.UTF_8));
|
||||
int result = matcher.search(0, text.length(), Option.DEFAULT);
|
||||
return (result != -1);
|
||||
}
|
||||
|
||||
/**
|
||||
* Matches and returns any named captures within a compiled grok expression that matched
|
||||
* within the provided text.
|
||||
*
|
||||
* @param text the text to match and extract values from.
|
||||
* @return a map containing field names and their respective coerced values that matched.
|
||||
*/
|
||||
public Map<String, Object> captures(String text) {
|
||||
byte[] textAsBytes = text.getBytes(StandardCharsets.UTF_8);
|
||||
Map<String, Object> fields = new HashMap<>();
|
||||
|
@ -140,16 +154,17 @@ final class Grok {
|
|||
Region region = matcher.getEagerRegion();
|
||||
for (Iterator<NameEntry> entry = compiledExpression.namedBackrefIterator(); entry.hasNext();) {
|
||||
NameEntry e = entry.next();
|
||||
int number = e.getBackRefs()[0];
|
||||
|
||||
String groupName = new String(e.name, e.nameP, e.nameEnd - e.nameP, StandardCharsets.UTF_8);
|
||||
String matchValue = null;
|
||||
if (region.beg[number] >= 0) {
|
||||
matchValue = new String(textAsBytes, region.beg[number], region.end[number] - region.beg[number],
|
||||
for (int number : e.getBackRefs()) {
|
||||
if (region.beg[number] >= 0) {
|
||||
String matchValue = new String(textAsBytes, region.beg[number], region.end[number] - region.beg[number],
|
||||
StandardCharsets.UTF_8);
|
||||
GrokMatchGroup match = new GrokMatchGroup(groupName, matchValue);
|
||||
fields.put(match.getName(), match.getValue());
|
||||
break;
|
||||
}
|
||||
}
|
||||
GrokMatchGroup match = new GrokMatchGroup(groupName, matchValue);
|
||||
fields.put(match.getName(), match.getValue());
|
||||
|
||||
}
|
||||
return fields;
|
||||
} else if (result != -1) {
|
||||
|
|
|
@ -68,7 +68,6 @@ public final class GrokProcessor extends AbstractProcessor {
|
|||
}
|
||||
|
||||
matches.entrySet().stream()
|
||||
.filter((e) -> Objects.nonNull(e.getValue()))
|
||||
.forEach((e) -> ingestDocument.setFieldValue(e.getKey(), e.getValue()));
|
||||
|
||||
if (traceMatch) {
|
||||
|
@ -108,24 +107,20 @@ public final class GrokProcessor extends AbstractProcessor {
|
|||
static String combinePatterns(List<String> patterns, boolean traceMatch) {
|
||||
String combinedPattern;
|
||||
if (patterns.size() > 1) {
|
||||
if (traceMatch) {
|
||||
combinedPattern = "";
|
||||
for (int i = 0; i < patterns.size(); i++) {
|
||||
String valueWrap = "(?<" + PATTERN_MATCH_KEY + "." + i + ">" + patterns.get(i) + ")";
|
||||
if (combinedPattern.equals("")) {
|
||||
combinedPattern = valueWrap;
|
||||
} else {
|
||||
combinedPattern = combinedPattern + "|" + valueWrap;
|
||||
}
|
||||
combinedPattern = "";
|
||||
for (int i = 0; i < patterns.size(); i++) {
|
||||
String pattern = patterns.get(i);
|
||||
String valueWrap;
|
||||
if (traceMatch) {
|
||||
valueWrap = "(?<" + PATTERN_MATCH_KEY + "." + i + ">" + pattern + ")";
|
||||
} else {
|
||||
valueWrap = "(?:" + patterns.get(i) + ")";
|
||||
}
|
||||
if (combinedPattern.equals("")) {
|
||||
combinedPattern = valueWrap;
|
||||
} else {
|
||||
combinedPattern = combinedPattern + "|" + valueWrap;
|
||||
}
|
||||
} else {
|
||||
combinedPattern = patterns.stream().reduce("", (prefix, value) -> {
|
||||
if (prefix.equals("")) {
|
||||
return "(?:" + value + ")";
|
||||
} else {
|
||||
return prefix + "|" + "(?:" + value + ")";
|
||||
}
|
||||
});
|
||||
}
|
||||
} else {
|
||||
combinedPattern = patterns.get(0);
|
||||
|
|
|
@ -186,4 +186,45 @@ public class GrokProcessorTests extends ESTestCase {
|
|||
combined = GrokProcessor.combinePatterns(Arrays.asList("foo", "bar"), true);
|
||||
assertThat(combined, equalTo("(?<_ingest._grok_match_index.0>foo)|(?<_ingest._grok_match_index.1>bar)"));
|
||||
}
|
||||
|
||||
public void testCombineSamePatternNameAcrossPatterns() throws Exception {
|
||||
String fieldName = RandomDocumentPicks.randomFieldName(random());
|
||||
IngestDocument doc = RandomDocumentPicks.randomIngestDocument(random(), new HashMap<>());
|
||||
doc.setFieldValue(fieldName, "1-3");
|
||||
Map<String, String> patternBank = new HashMap<>();
|
||||
patternBank.put("ONE", "1");
|
||||
patternBank.put("TWO", "2");
|
||||
patternBank.put("THREE", "3");
|
||||
GrokProcessor processor = new GrokProcessor(randomAsciiOfLength(10), patternBank,
|
||||
Arrays.asList("%{ONE:first}-%{TWO:second}", "%{ONE:first}-%{THREE:second}"), fieldName, randomBoolean(), randomBoolean());
|
||||
processor.execute(doc);
|
||||
assertThat(doc.getFieldValue("first", String.class), equalTo("1"));
|
||||
assertThat(doc.getFieldValue("second", String.class), equalTo("3"));
|
||||
}
|
||||
|
||||
public void testFirstWinNamedCapture() throws Exception {
|
||||
String fieldName = RandomDocumentPicks.randomFieldName(random());
|
||||
IngestDocument doc = RandomDocumentPicks.randomIngestDocument(random(), new HashMap<>());
|
||||
doc.setFieldValue(fieldName, "12");
|
||||
Map<String, String> patternBank = new HashMap<>();
|
||||
patternBank.put("ONETWO", "1|2");
|
||||
GrokProcessor processor = new GrokProcessor(randomAsciiOfLength(10), patternBank,
|
||||
Collections.singletonList("%{ONETWO:first}%{ONETWO:first}"), fieldName, randomBoolean(), randomBoolean());
|
||||
processor.execute(doc);
|
||||
assertThat(doc.getFieldValue("first", String.class), equalTo("1"));
|
||||
}
|
||||
|
||||
public void testUnmatchedNamesNotIncludedInDocument() throws Exception {
|
||||
String fieldName = RandomDocumentPicks.randomFieldName(random());
|
||||
IngestDocument doc = RandomDocumentPicks.randomIngestDocument(random(), new HashMap<>());
|
||||
doc.setFieldValue(fieldName, "3");
|
||||
Map<String, String> patternBank = new HashMap<>();
|
||||
patternBank.put("ONETWO", "1|2");
|
||||
patternBank.put("THREE", "3");
|
||||
GrokProcessor processor = new GrokProcessor(randomAsciiOfLength(10), patternBank,
|
||||
Collections.singletonList("%{ONETWO:first}|%{THREE:second}"), fieldName, randomBoolean(), randomBoolean());
|
||||
processor.execute(doc);
|
||||
assertFalse(doc.hasField("first"));
|
||||
assertThat(doc.getFieldValue("second", String.class), equalTo("3"));
|
||||
}
|
||||
}
|
||||
|
|
|
@ -307,4 +307,14 @@ public class GrokTests extends ESTestCase {
|
|||
Grok grok = new Grok(bank, "%{MONTHDAY:greatday}");
|
||||
assertThat(grok.captures("nomatch"), nullValue());
|
||||
}
|
||||
|
||||
public void testMultipleNamedCapturesWithSameName() {
|
||||
Map<String, String> bank = new HashMap<>();
|
||||
bank.put("SINGLEDIGIT", "[0-9]");
|
||||
Grok grok = new Grok(bank, "%{SINGLEDIGIT:num}%{SINGLEDIGIT:num}");
|
||||
|
||||
Map<String, Object> expected = new HashMap<>();
|
||||
expected.put("num", "1");
|
||||
assertThat(grok.captures("12"), equalTo(expected));
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue