mirror of
synced 2025-03-09 14:34:43 +00:00
Introduce the dissect library (#32297)
The dissect library will be used for the ingest node as an alternative to Grok to split a string based on a pattern. Dissect differs from Grok such that regular expressions are not used to split the string. Note - Regular expressions are used during construction of the objects, but not in the hot path. A dissect pattern takes the form of: '%{a} %{b},%{c}' which is composed of 3 keys (a,b,c) and two delimiters (space and comma). This dissect pattern will match a string of the form: 'foo bar,baz' and will result a key/value pairing of 'a=foo, b=bar, and c=baz'. See the comments in DissectParser for a full explanation. This commit does not include the ingest node processor that will consume it. However, the consumption should be a trivial mapping between the key/value pairing returned by the parser and the key/value pairing needed for the IngestDocument.
This commit is contained in:
Normal file
Normal file
@ -0,0 +1,50 @@
import org.elasticsearch.gradle.precommit.PrecommitTasks
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
* http://www.apache.org/licenses/LICENSE-2.0
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
archivesBaseName = 'elasticsearch-dissect'
dependencies {
if (isEclipse == false || project.path == ":libs:dissect-tests") {
testCompile("org.elasticsearch.test:framework:${version}") {
exclude group: 'org.elasticsearch', module: 'dissect'
testCompile "com.fasterxml.jackson.core:jackson-core:${versions.jackson}"
forbiddenApisMain {
signaturesURLs = [PrecommitTasks.getResource('/forbidden/jdk-signatures.txt')]
if (isEclipse) {
// in eclipse the project is under a fake root, we need to change around the source sets
sourceSets {
if (project.path == ":libs:dissect") {
main.java.srcDirs = ['java']
main.resources.srcDirs = ['resources']
} else {
test.java.srcDirs = ['java']
test.resources.srcDirs = ['resources']
Normal file
Normal file
@ -0,0 +1,3 @@
// this is just shell gradle file for eclipse to have separate projects for dissect src and tests
apply from: '../../build.gradle'
@ -0,0 +1,57 @@
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
* http://www.apache.org/licenses/LICENSE-2.0
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
package org.elasticsearch.dissect;
* Parent class for all dissect related exceptions. Consumers may catch this exception or more specific child exceptions.
public abstract class DissectException extends RuntimeException {
DissectException(String message) {
* Error while parsing a dissect pattern
static class PatternParse extends DissectException {
PatternParse(String pattern, String reason) {
super("Unable to parse pattern: " + pattern + " Reason: " + reason);
* Error while parsing a dissect key
static class KeyParse extends DissectException {
KeyParse(String key, String reason) {
super("Unable to parse key: " + key + " Reason: " + reason);
* Unable to find a match between pattern and source string
static class FindMatch extends DissectException {
FindMatch(String pattern, String source) {
super("Unable to find match for dissect pattern: " + pattern + " against source: " + source);
@ -0,0 +1,191 @@
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
* http://www.apache.org/licenses/LICENSE-2.0
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
package org.elasticsearch.dissect;
import java.util.EnumSet;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
* <p>A Key of a dissect pattern. This class models the name and modifiers and provides some validation.</p>
* <p>For dissect pattern of {@code %{a} %{+a} %{b}} the dissect keys are:
* <ul>
* <li>{@code a}</li>
* <li>{@code +a}</li>
* <li>{@code b}</li>
* </ul>
* This class represents a single key.
* <p>A single key is composed of a name and it's modifiers. For the key {@code +a}, {@code a} is the name and {@code +} is the modifier.
* @see DissectParser
public final class DissectKey {
private static final Pattern LEFT_MODIFIER_PATTERN = Pattern.compile("([+*&?])(.*?)(->)?$", Pattern.DOTALL);
private static final Pattern RIGHT_PADDING_PATTERN = Pattern.compile("^(.*?)(->)?$", Pattern.DOTALL);
private static final Pattern APPEND_WITH_ORDER_PATTERN = Pattern.compile("[+](.*?)(/)([0-9]+)(->)?$", Pattern.DOTALL);
private final Modifier modifier;
private boolean skip;
private boolean skipRightPadding;
private int appendPosition;
private String name;
* Constructor - parses the String key into it's name and modifier(s)
* @param key The key without the leading <code>%{</code> or trailing <code>}</code>, for example {@code a->}
DissectKey(String key) {
skip = key == null || key.isEmpty();
modifier = Modifier.findModifier(key);
switch (modifier) {
case NONE:
Matcher matcher = RIGHT_PADDING_PATTERN.matcher(key);
while (matcher.find()) {
name = matcher.group(1);
skipRightPadding = matcher.group(2) != null;
skip = name.isEmpty();
matcher = LEFT_MODIFIER_PATTERN.matcher(key);
while (matcher.find()) {
name = matcher.group(2);
skipRightPadding = matcher.group(3) != null;
skip = true;
case APPEND:
matcher = LEFT_MODIFIER_PATTERN.matcher(key);
while (matcher.find()) {
name = matcher.group(2);
skipRightPadding = matcher.group(3) != null;
matcher = LEFT_MODIFIER_PATTERN.matcher(key);
while (matcher.find()) {
name = matcher.group(2);
skipRightPadding = matcher.group(3) != null;
matcher = LEFT_MODIFIER_PATTERN.matcher(key);
while (matcher.find()) {
name = matcher.group(2);
skipRightPadding = matcher.group(3) != null;
matcher = APPEND_WITH_ORDER_PATTERN.matcher(key);
while (matcher.find()) {
name = matcher.group(1);
appendPosition = Short.valueOf(matcher.group(3));
skipRightPadding = matcher.group(4) != null;
if (name == null || (name.isEmpty() && !skip)) {
throw new DissectException.KeyParse(key, "The key name could be determined");
* Copy constructor to explicitly override the modifier.
* @param key The key to copy (except for the modifier)
* @param modifier the modifer to use for this copy
DissectKey(DissectKey key, DissectKey.Modifier modifier){
this.modifier = modifier;
this.skipRightPadding = key.skipRightPadding;
this.skip = key.skip;
this.name = key.name;
this.appendPosition = key.appendPosition;
Modifier getModifier() {
return modifier;
boolean skip() {
return skip;
boolean skipRightPadding() {
return skipRightPadding;
int getAppendPosition() {
return appendPosition;
String getName() {
return name;
public String toString() {
return "DissectKey{" +
"modifier=" + modifier +
", skip=" + skip +
", appendPosition=" + appendPosition +
", name='" + name + '\'' +
public enum Modifier {
private static final Pattern MODIFIER_PATTERN = Pattern.compile("[/+*&?]");
private final String modifier;
public String toString() {
return modifier;
Modifier(final String modifier) {
this.modifier = modifier;
//package private for testing
static Modifier fromString(String modifier) {
return EnumSet.allOf(Modifier.class).stream().filter(km -> km.modifier.equals(modifier))
.findFirst().orElseThrow(() -> new IllegalArgumentException("Found invalid modifier.")); //throw should never happen
private static Modifier findModifier(String key) {
Modifier modifier = Modifier.NONE;
if (key != null && !key.isEmpty()) {
Matcher matcher = MODIFIER_PATTERN.matcher(key);
int matches = 0;
while (matcher.find()) {
Modifier priorModifier = modifier;
modifier = Modifier.fromString(matcher.group());
if (++matches > 1 && !(APPEND.equals(priorModifier) && APPEND_WITH_ORDER.equals(modifier))) {
throw new DissectException.KeyParse(key, "multiple modifiers are not allowed.");
return modifier;
@ -0,0 +1,198 @@
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
* http://www.apache.org/licenses/LICENSE-2.0
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
package org.elasticsearch.dissect;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.stream.Collectors;
* Represents the matches of a {@link DissectParser#parse(String)}. Handles the appending and referencing based on the key instruction.
final class DissectMatch {
private final String appendSeparator;
private final Map<String, String> results;
private final Map<String, String> simpleResults;
private final Map<String, ReferenceResult> referenceResults;
private final Map<String, AppendResult> appendResults;
private int implicitAppendOrder = -1000;
private final int maxMatches;
private final int maxResults;
private final int appendCount;
private final int referenceCount;
private final int simpleCount;
private int matches = 0;
DissectMatch(String appendSeparator, int maxMatches, int maxResults, int appendCount, int referenceCount) {
if (maxMatches <= 0 || maxResults <= 0) {
throw new IllegalArgumentException("Expected results are zero, can not construct DissectMatch");//should never happen
this.maxMatches = maxMatches;
this.maxResults = maxResults;
this.appendCount = appendCount;
this.referenceCount = referenceCount;
this.appendSeparator = appendSeparator;
results = new HashMap<>(maxResults);
this.simpleCount = maxMatches - referenceCount - appendCount;
simpleResults = simpleCount <= 0 ? null : new HashMap<>(simpleCount);
referenceResults = referenceCount <= 0 ? null : new HashMap<>(referenceCount);
appendResults = appendCount <= 0 ? null : new HashMap<>(appendCount);
* Add the key/value that was found as result of the parsing
* @param key the {@link DissectKey}
* @param value the discovered value for the key
void add(DissectKey key, String value) {
if (key.skip()) {
switch (key.getModifier()) {
case NONE:
simpleResults.put(key.getName(), value);
case APPEND:
appendResults.computeIfAbsent(key.getName(), k -> new AppendResult(appendSeparator)).addValue(value, implicitAppendOrder++);
k -> new AppendResult(appendSeparator)).addValue(value, key.getAppendPosition());
referenceResults.computeIfAbsent(key.getName(), k -> new ReferenceResult()).setKey(value);
referenceResults.computeIfAbsent(key.getName(), k -> new ReferenceResult()).setValue(value);
boolean fullyMatched() {
return matches == maxMatches;
* Checks if results are valid.
* @param results the results to check
* @return true if all dissect keys have been matched and the results are of the expected size.
boolean isValid(Map<String, String> results) {
return fullyMatched() && results.size() == maxResults;
* Gets all the current matches. Pass the results of this to isValid to determine if a fully successful match has occured.
* @return the map of the results.
Map<String, String> getResults() {
if (simpleCount > 0) {
if (referenceCount > 0) {
referenceResults.forEach((k, v) -> results.put(v.getKey(), v.getValue()));
if (appendCount > 0) {
appendResults.forEach((k, v) -> results.put(k, v.getAppendResult()));
return results;
* a result that will need to be part of an append operation.
private final class AppendResult {
private final List<AppendValue> values = new ArrayList<>();
private final String appendSeparator;
private AppendResult(String appendSeparator) {
this.appendSeparator = appendSeparator;
private void addValue(String value, int order) {
values.add(new AppendValue(value, order));
private String getAppendResult() {
return values.stream().map(AppendValue::getValue).collect(Collectors.joining(appendSeparator));
* An appendable value that can be sorted based on the provided order
private final class AppendValue implements Comparable<AppendValue> {
private final String value;
private final int order;
private AppendValue(String value, int order) {
this.value = value;
this.order = order;
private String getValue() {
return value;
private int getOrder() {
return order;
public int compareTo(AppendValue o) {
return Integer.compare(this.order, o.getOrder());
* A result that needs to be converted to a key/value reference
private final class ReferenceResult {
private String key;
private String getKey() {
return key;
private String getValue() {
return value;
private String value;
private void setValue(String value) {
this.value = value;
private void setKey(String key) {
this.key = key;
@ -0,0 +1,310 @@
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
* http://www.apache.org/licenses/LICENSE-2.0
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
package org.elasticsearch.dissect;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.EnumSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.function.Function;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.stream.Collectors;
* <p>Splits (dissects) a string into its parts based on a pattern.</p><p>A dissect pattern is composed of a set of keys and delimiters.
* For example the dissect pattern: <pre>%{a} %{b},%{c}</pre> has 3 keys (a,b,c) and two delimiters (space and comma). This pattern will
* match a string of the form: <pre>foo bar,baz</pre> and will result a key/value pairing of <pre>a=foo, b=bar, and c=baz.</pre>
* <p>Matches are all or nothing. For example, the same pattern will NOT match <pre>foo bar baz</pre> since all of the delimiters did not
* match. (the comma did not match)
* <p>Dissect patterns can optionally have modifiers. These modifiers instruct the parser to change it's behavior. For example the
* dissect pattern of <pre>%{a},%{b}:%{c}</pre> would not match <pre>foo,bar,baz</pre> since there the colon never matches.
* <p>Modifiers appear to the left or the right of the key name. The supported modifiers are:
* <ul>
* <li>{@code ->} Instructs the parser to ignore repeating delimiters to the right of the key. Example: <pre>
* pattern: {@code %{a->} %{b} %{c}}
* string: {@code foo bar baz}
* result: {@code a=foo, b=bar, c=baz}
* </pre></li>
* <li>{@code +} Instructs the parser to appends this key's value to value of prior key with the same name.
* Example: <pre>
* pattern: {@code %{a} %{+a} %{+a}}
* string: {@code foo bar baz}
* result: {@code a=foobarbaz}
* </pre></li>
* <li>{@code /} Instructs the parser to appends this key's value to value of a key based based on the order specified after the
* {@code /}. Requires the {@code +} modifier to also be present in the key. Example: <pre>
* pattern: {@code %{a} %{+a/2} %{+a/1}}
* string: {@code foo bar baz}
* result: {@code a=foobazbar}
* </pre>
* </li>
* <li>{@code *} Instructs the parser to ignore the name of this key, instead use the value of key as the key name.
* Requires another key with the same name and the {@code &} modifier to be the value. Example: <pre>
* pattern: {@code %{*a} %{b} %{&a}}
* string: {@code foo bar baz}
* result: {@code foo=baz, b=bar}
* </pre></li>
* <li>{@code &} Instructs the parser to ignore this key and place the matched value to a key of the same name with the {@code *} modifier.
* Requires another key with the same name and the {@code *} modifier.
* Example: <pre>
* pattern: {@code %{*a} %{b} %{&a}}
* string: {@code foo bar baz}
* result: {@code foo=baz, b=bar}
* </pre></li>
* <li>{@code ?} Instructs the parser to ignore this key. The key name exists only for the purpose of human readability. Example
* <pre>
* pattern: {@code %{a} %{?skipme} %{c}}
* string: {@code foo bar baz}
* result: {@code a=foo, c=baz}
* </pre>
* </ul>
* <p>Empty key names patterns are also supported. They behave just like the {@code ?} modifier, except the name is not required.
* The result will simply be ignored. Example
* <pre>
* pattern: {@code %{a} %{} %{c}}
* string: {@code foo bar baz}
* result: {@code a=foo, c=baz}
* </pre>
* <p>
* Inspired by the Logstash Dissect Filter by Guy Boertje
public final class DissectParser {
private static final Pattern LEADING_DELIMITER_PATTERN = Pattern.compile("^(.*?)%");
private static final Pattern KEY_DELIMITER_FIELD_PATTERN = Pattern.compile("%\\{([^}]*?)}([^%]*)", Pattern.DOTALL);
private static final EnumSet<DissectKey.Modifier> ASSOCIATE_MODIFIERS = EnumSet.of(
private static final EnumSet<DissectKey.Modifier> APPEND_MODIFIERS = EnumSet.of(
private static final Function<DissectPair, String> KEY_NAME = val -> val.getKey().getName();
private final List<DissectPair> matchPairs;
private final String pattern;
private String leadingDelimiter = "";
private final int maxMatches;
private final int maxResults;
private final int appendCount;
private final int referenceCount;
private final String appendSeparator;
public DissectParser(String pattern, String appendSeparator) {
this.pattern = pattern;
this.appendSeparator = appendSeparator == null ? "" : appendSeparator;
Matcher matcher = LEADING_DELIMITER_PATTERN.matcher(pattern);
while (matcher.find()) {
leadingDelimiter = matcher.group(1);
List<DissectPair> matchPairs = new ArrayList<>();
matcher = KEY_DELIMITER_FIELD_PATTERN.matcher(pattern.substring(leadingDelimiter.length()));
while (matcher.find()) {
DissectKey key = new DissectKey(matcher.group(1));
String delimiter = matcher.group(2);
matchPairs.add(new DissectPair(key, delimiter));
this.maxMatches = matchPairs.size();
this.maxResults = Long.valueOf(matchPairs.stream()
.filter(dissectPair -> !dissectPair.getKey().skip()).map(KEY_NAME).distinct().count()).intValue();
if (this.maxMatches == 0 || maxResults == 0) {
throw new DissectException.PatternParse(pattern, "Unable to find any keys or delimiters.");
//append validation - look through all of the keys to see if there are any keys that need to participate in an append operation
// but don't have the '+' defined
Set<String> appendKeyNames = matchPairs.stream()
.filter(dissectPair -> APPEND_MODIFIERS.contains(dissectPair.getKey().getModifier()))
if (appendKeyNames.size() > 0) {
List<DissectPair> modifiedMatchPairs = new ArrayList<>(matchPairs.size());
for (DissectPair p : matchPairs) {
if (p.getKey().getModifier().equals(DissectKey.Modifier.NONE) && appendKeyNames.contains(p.getKey().getName())) {
modifiedMatchPairs.add(new DissectPair(new DissectKey(p.getKey(), DissectKey.Modifier.APPEND), p.getDelimiter()));
} else {
matchPairs = modifiedMatchPairs;
appendCount = appendKeyNames.size();
//reference validation - ensure that '*' and '&' come in pairs
Map<String, List<DissectPair>> referenceGroupings = matchPairs.stream()
.filter(dissectPair -> ASSOCIATE_MODIFIERS.contains(dissectPair.getKey().getModifier()))
for (Map.Entry<String, List<DissectPair>> entry : referenceGroupings.entrySet()) {
if (entry.getValue().size() != 2) {
throw new DissectException.PatternParse(pattern, "Found invalid key/reference associations: '"
+ entry.getValue().stream().map(KEY_NAME).collect(Collectors.joining(",")) +
"' Please ensure each '*<key>' is matched with a matching '&<key>");
referenceCount = referenceGroupings.size() * 2;
this.matchPairs = Collections.unmodifiableList(matchPairs);
* <p>Entry point to dissect a string into it's parts.</p>
* @param inputString The string to dissect
* @return the key/value Map of the results
* @throws DissectException if unable to dissect a pair into it's parts.
public Map<String, String> parse(String inputString) {
* This implements a naive string matching algorithm. The string is walked left to right, comparing each byte against
* another string's bytes looking for matches. If the bytes match, then a second cursor looks ahead to see if all the bytes
* of the other string matches. If they all match, record it and advances the primary cursor to the match point. If it can not match
* all of the bytes then progress the main cursor. Repeat till the end of the input string. Since the string being searching for
* (the delimiter) is generally small and rare the naive approach is efficient.
* In this case the the string that is walked is the input string, and the string being searched for is the current delimiter.
* For example for a dissect pattern of {@code %{a},%{b}:%{c}} the delimiters (comma then colon) are searched for in the
* input string. At class construction the list of keys+delimiters are found (dissectPairs), which allows the use of that ordered
* list to know which delimiter to use for the search. The delimiters is progressed once the current delimiter is matched.
* There are two special cases that requires additional parsing beyond the standard naive algorithm. Consecutive delimiters should
* results in a empty matches unless the {@code ->} is provided. For example given the dissect pattern of
* {@code %{a},%{b},%{c},%{d}} and input string of {@code foo,,,} the match should be successful with empty values for b,c and d.
* However, if the key modifier {@code ->}, is present it will simply skip over any delimiters just to the right of the key
* without assigning any values. For example {@code %{a->},{%b}} will match the input string of {@code foo,,,,,,bar} with a=foo and
* b=bar.
DissectMatch dissectMatch = new DissectMatch(appendSeparator, maxMatches, maxResults, appendCount, referenceCount);
Iterator<DissectPair> it = matchPairs.iterator();
//ensure leading delimiter matches
if (inputString != null && inputString.length() > leadingDelimiter.length()
&& leadingDelimiter.equals(inputString.substring(0, leadingDelimiter.length()))) {
byte[] input = inputString.getBytes(StandardCharsets.UTF_8);
//grab the first key/delimiter pair
DissectPair dissectPair = it.next();
DissectKey key = dissectPair.getKey();
byte[] delimiter = dissectPair.getDelimiter().getBytes(StandardCharsets.UTF_8);
//start dissection after the first delimiter
int i = leadingDelimiter.length();
int valueStart = i;
int lookAheadMatches;
//start walking the input string byte by byte, look ahead for matches where needed
//if a match is found jump forward to the end of the match
for (; i < input.length; i++) {
lookAheadMatches = 0;
//potential match between delimiter and input string
if (delimiter.length > 0 && input[i] == delimiter[0]) {
//look ahead to see if the entire delimiter matches the input string
for (int j = 0; j < delimiter.length; j++) {
if (i + j < input.length && input[i + j] == delimiter[j]) {
//found a full delimiter match
if (lookAheadMatches == delimiter.length) {
//record the key/value tuple
byte[] value = Arrays.copyOfRange(input, valueStart, i);
dissectMatch.add(key, new String(value, StandardCharsets.UTF_8));
//jump to the end of the match
i += lookAheadMatches;
//look for consecutive delimiters (e.g. a,,,,d,e)
while (i < input.length) {
lookAheadMatches = 0;
for (int j = 0; j < delimiter.length; j++) {
if (i + j < input.length && input[i + j] == delimiter[j]) {
//found consecutive delimiters
if (lookAheadMatches == delimiter.length) {
//jump to the end of the match
i += lookAheadMatches;
if (!key.skipRightPadding()) {
//progress the keys/delimiter if possible
if (!it.hasNext()) {
break; //the while loop
dissectPair = it.next();
key = dissectPair.getKey();
//add the key with an empty value for the empty delimiter
dissectMatch.add(key, "");
} else {
break; //the while loop
//progress the keys/delimiter if possible
if (!it.hasNext()) {
break; //the for loop
dissectPair = it.next();
key = dissectPair.getKey();
delimiter = dissectPair.getDelimiter().getBytes(StandardCharsets.UTF_8);
//i is always one byte after the last found delimiter, aka the start of the next value
valueStart = i;
//the last key, grab the rest of the input (unless consecutive delimiters already grabbed the last key)
//and there is no trailing delimiter
if (!dissectMatch.fullyMatched() && delimiter.length == 0 ) {
byte[] value = Arrays.copyOfRange(input, valueStart, input.length);
String valueString = new String(value, StandardCharsets.UTF_8);
dissectMatch.add(key, valueString);
Map<String, String> results = dissectMatch.getResults();
if (!dissectMatch.isValid(results)) {
throw new DissectException.FindMatch(pattern, inputString);
return results;
* A tuple class to hold the dissect key and delimiter
private class DissectPair {
private final DissectKey key;
private final String delimiter;
private DissectPair(DissectKey key, String delimiter) {
this.key = key;
this.delimiter = delimiter;
private DissectKey getKey() {
return key;
private String getDelimiter() {
return delimiter;
Normal file
Normal file
@ -0,0 +1,7 @@
// this is just shell gradle file for eclipse to have separate projects for dissect src and tests
apply from: '../../build.gradle'
dependencies {
testCompile project(':libs:dissect')
@ -0,0 +1,178 @@
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
* http://www.apache.org/licenses/LICENSE-2.0
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
package org.elasticsearch.dissect;
import org.elasticsearch.test.ESTestCase;
import org.hamcrest.CoreMatchers;
import java.util.EnumSet;
import java.util.List;
import java.util.stream.Collectors;
import static org.hamcrest.Matchers.equalTo;
import static org.hamcrest.Matchers.is;
public class DissectKeyTests extends ESTestCase {
public void testNoModifier() {
String keyName = randomAlphaOfLengthBetween(1, 10);
DissectKey dissectKey = new DissectKey(keyName);
assertThat(dissectKey.getModifier(), equalTo(DissectKey.Modifier.NONE));
assertThat(dissectKey.skip(), is(false));
assertThat(dissectKey.skipRightPadding(), is(false));
assertThat(dissectKey.getAppendPosition(), equalTo(0));
assertThat(dissectKey.getName(), equalTo(keyName));
public void testAppendModifier() {
String keyName = randomAlphaOfLengthBetween(1, 10);
DissectKey dissectKey = new DissectKey("+" + keyName);
assertThat(dissectKey.getModifier(), equalTo(DissectKey.Modifier.APPEND));
assertThat(dissectKey.skip(), is(false));
assertThat(dissectKey.skipRightPadding(), is(false));
assertThat(dissectKey.getAppendPosition(), equalTo(0));
assertThat(dissectKey.getName(), equalTo(keyName));
public void testAppendWithOrderModifier() {
String keyName = randomAlphaOfLengthBetween(1, 10);
int length = randomIntBetween(1, 100);
DissectKey dissectKey = new DissectKey("+" + keyName + "/" + length);
assertThat(dissectKey.getModifier(), equalTo(DissectKey.Modifier.APPEND_WITH_ORDER));
assertThat(dissectKey.skip(), is(false));
assertThat(dissectKey.skipRightPadding(), is(false));
assertThat(dissectKey.getAppendPosition(), equalTo(length));
assertThat(dissectKey.getName(), equalTo(keyName));
public void testAppendWithOrderModifierNoName() {
int length = randomIntBetween(1, 100);
DissectException e = expectThrows(DissectException.class, () -> new DissectKey("+/" + length));
assertThat(e.getMessage(), CoreMatchers.containsString("Unable to parse key"));
public void testOrderModifierWithoutAppend() {
String keyName = randomAlphaOfLengthBetween(1, 10);
int length = randomIntBetween(1, 100);
DissectException e = expectThrows(DissectException.class, () -> new DissectKey(keyName + "/" + length));
assertThat(e.getMessage(), CoreMatchers.containsString("Unable to parse key"));
public void testFieldNameModifier() {
String keyName = randomAlphaOfLengthBetween(1, 10);
DissectKey dissectKey = new DissectKey("*" + keyName);
assertThat(dissectKey.getModifier(), equalTo(DissectKey.Modifier.FIELD_NAME));
assertThat(dissectKey.skip(), is(false));
assertThat(dissectKey.skipRightPadding(), is(false));
assertThat(dissectKey.getAppendPosition(), equalTo(0));
assertThat(dissectKey.getName(), equalTo(keyName));
public void testFieldValueModifiers() {
String keyName = randomAlphaOfLengthBetween(1, 10);
DissectKey dissectKey = new DissectKey("&" + keyName);
assertThat(dissectKey.getModifier(), equalTo(DissectKey.Modifier.FIELD_VALUE));
assertThat(dissectKey.skip(), is(false));
assertThat(dissectKey.skipRightPadding(), is(false));
assertThat(dissectKey.getAppendPosition(), equalTo(0));
assertThat(dissectKey.getName(), equalTo(keyName));
public void testRightPaddingModifiers() {
String keyName = randomAlphaOfLengthBetween(1, 10);
DissectKey dissectKey = new DissectKey(keyName + "->");
assertThat(dissectKey.getModifier(), equalTo(DissectKey.Modifier.NONE));
assertThat(dissectKey.skip(), is(false));
assertThat(dissectKey.skipRightPadding(), is(true));
assertThat(dissectKey.getAppendPosition(), equalTo(0));
assertThat(dissectKey.getName(), equalTo(keyName));
dissectKey = new DissectKey("*" + keyName + "->");
assertThat(dissectKey.skipRightPadding(), is(true));
dissectKey = new DissectKey("&" + keyName + "->");
assertThat(dissectKey.skipRightPadding(), is(true));
dissectKey = new DissectKey("+" + keyName + "->");
assertThat(dissectKey.skipRightPadding(), is(true));
dissectKey = new DissectKey("?" + keyName + "->");
assertThat(dissectKey.skipRightPadding(), is(true));
dissectKey = new DissectKey("+" + keyName + "/2->");
assertThat(dissectKey.skipRightPadding(), is(true));
public void testMultipleLeftModifiers() {
String keyName = randomAlphaOfLengthBetween(1, 10);
List<String> validModifiers = EnumSet.allOf(DissectKey.Modifier.class).stream()
.filter(m -> !m.equals(DissectKey.Modifier.NONE))
String modifier1 = randomFrom(validModifiers);
String modifier2 = randomFrom(validModifiers);
DissectException e = expectThrows(DissectException.class, () -> new DissectKey(modifier1 + modifier2 + keyName));
assertThat(e.getMessage(), CoreMatchers.containsString("Unable to parse key"));
public void testSkipKey() {
String keyName = "";
DissectKey dissectKey = new DissectKey(keyName);
assertThat(dissectKey.getModifier(), equalTo(DissectKey.Modifier.NONE));
assertThat(dissectKey.skip(), is(true));
assertThat(dissectKey.skipRightPadding(), is(false));
assertThat(dissectKey.getAppendPosition(), equalTo(0));
assertThat(dissectKey.getName(), equalTo(keyName));
public void testNamedSkipKey() {
String keyName = "myname";
DissectKey dissectKey = new DissectKey("?" +keyName);
assertThat(dissectKey.getModifier(), equalTo(DissectKey.Modifier.NAMED_SKIP));
assertThat(dissectKey.skip(), is(true));
assertThat(dissectKey.skipRightPadding(), is(false));
assertThat(dissectKey.getAppendPosition(), equalTo(0));
assertThat(dissectKey.getName(), equalTo(keyName));
public void testSkipKeyWithPadding() {
String keyName = "";
DissectKey dissectKey = new DissectKey(keyName + "->");
assertThat(dissectKey.getModifier(), equalTo(DissectKey.Modifier.NONE));
assertThat(dissectKey.skip(), is(true));
assertThat(dissectKey.skipRightPadding(), is(true));
assertThat(dissectKey.getAppendPosition(), equalTo(0));
assertThat(dissectKey.getName(), equalTo(keyName));
public void testNamedEmptySkipKeyWithPadding() {
String keyName = "";
DissectKey dissectKey = new DissectKey("?" +keyName + "->");
assertThat(dissectKey.getModifier(), equalTo(DissectKey.Modifier.NAMED_SKIP));
assertThat(dissectKey.skip(), is(true));
assertThat(dissectKey.skipRightPadding(), is(true));
assertThat(dissectKey.getAppendPosition(), equalTo(0));
assertThat(dissectKey.getName(), equalTo(keyName));
public void testInvalidModifiers() {
//should never happen due to regex
IllegalArgumentException e = expectThrows(IllegalArgumentException.class, () -> DissectKey.Modifier.fromString("x"));
assertThat(e.getMessage(), CoreMatchers.containsString("invalid modifier"));
@ -0,0 +1,93 @@
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
* http://www.apache.org/licenses/LICENSE-2.0
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
package org.elasticsearch.dissect;
import org.elasticsearch.common.collect.MapBuilder;
import org.elasticsearch.test.ESTestCase;
import java.nio.charset.StandardCharsets;
import java.util.Map;
import java.util.stream.IntStream;
import static org.hamcrest.Matchers.equalTo;
public class DissectMatchTests extends ESTestCase {
public void testIllegalArgs() {
expectThrows(IllegalArgumentException.class, () -> new DissectMatch("", 0, 1, 0, 0));
expectThrows(IllegalArgumentException.class, () -> new DissectMatch("", 1, 0, 0, 0));
public void testValidAndFullyMatched() {
int expectedMatches = randomIntBetween(1, 26);
DissectMatch dissectMatch = new DissectMatch("", expectedMatches, expectedMatches, 0, 0);
IntStream.range(97, 97 + expectedMatches) //allow for a-z values
.forEach(i -> dissectMatch.add(new DissectKey(new String(new byte[]{(byte) i}, StandardCharsets.UTF_8)), ""));
assertThat(dissectMatch.fullyMatched(), equalTo(true));
assertThat(dissectMatch.isValid(dissectMatch.getResults()), equalTo(true));
public void testNotValidAndFullyMatched() {
int expectedMatches = randomIntBetween(1, 26);
DissectMatch dissectMatch = new DissectMatch("", expectedMatches, expectedMatches, 0, 0);
IntStream.range(97, 97 + expectedMatches - 1) //allow for a-z values
.forEach(i -> dissectMatch.add(new DissectKey(new String(new byte[]{(byte) i}, StandardCharsets.UTF_8)), ""));
assertThat(dissectMatch.fullyMatched(), equalTo(false));
assertThat(dissectMatch.isValid(dissectMatch.getResults()), equalTo(false));
public void testGetResultsIdempotent(){
int expectedMatches = randomIntBetween(1, 26);
DissectMatch dissectMatch = new DissectMatch("", expectedMatches, expectedMatches, 0, 0);
IntStream.range(97, 97 + expectedMatches) //allow for a-z values
.forEach(i -> dissectMatch.add(new DissectKey(new String(new byte[]{(byte) i}, StandardCharsets.UTF_8)), ""));
assertThat(dissectMatch.getResults(), equalTo(dissectMatch.getResults()));
public void testAppend(){
DissectMatch dissectMatch = new DissectMatch("-", 3, 1, 3, 0);
dissectMatch.add(new DissectKey("+a"), "x");
dissectMatch.add(new DissectKey("+a"), "y");
dissectMatch.add(new DissectKey("+a"), "z");
Map<String, String> results = dissectMatch.getResults();
assertThat(dissectMatch.isValid(results), equalTo(true));
assertThat(results, equalTo(MapBuilder.newMapBuilder().put("a", "x-y-z").map()));
public void testAppendWithOrder(){
DissectMatch dissectMatch = new DissectMatch("-", 3, 1, 3, 0);
dissectMatch.add(new DissectKey("+a/3"), "x");
dissectMatch.add(new DissectKey("+a"), "y");
dissectMatch.add(new DissectKey("+a/1"), "z");
Map<String, String> results = dissectMatch.getResults();
assertThat(dissectMatch.isValid(results), equalTo(true));
assertThat(results, equalTo(MapBuilder.newMapBuilder().put("a", "y-z-x").map()));
public void testReference(){
DissectMatch dissectMatch = new DissectMatch("-", 2, 1, 0, 1);
dissectMatch.add(new DissectKey("&a"), "x");
dissectMatch.add(new DissectKey("*a"), "y");
Map<String, String> results = dissectMatch.getResults();
assertThat(dissectMatch.isValid(results), equalTo(true));
assertThat(results, equalTo(MapBuilder.newMapBuilder().put("y", "x").map()));
@ -0,0 +1,386 @@
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
* http://www.apache.org/licenses/LICENSE-2.0
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
package org.elasticsearch.dissect;
import com.fasterxml.jackson.databind.JsonNode;
import com.fasterxml.jackson.databind.ObjectMapper;
import org.elasticsearch.test.ESTestCase;
import org.hamcrest.CoreMatchers;
import org.hamcrest.Matchers;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import static com.carrotsearch.randomizedtesting.RandomizedTest.randomAsciiAlphanumOfLengthBetween;
public class DissectParserTests extends ESTestCase {
public void testJavaDocExamples() {
assertMatch("%{a} %{b},%{c}", "foo bar,baz", Arrays.asList("a", "b", "c"), Arrays.asList("foo", "bar", "baz"));
assertMiss("%{a},%{b}:%{c}", "foo,bar,baz");
assertMatch("%{a->} %{b} %{c}", "foo bar baz", Arrays.asList("a", "b", "c"), Arrays.asList("foo", "bar", "baz"));
assertMatch("%{a} %{+a} %{+a}", "foo bar baz", Arrays.asList("a"), Arrays.asList("foobarbaz"));
assertMatch("%{a} %{+a/2} %{+a/1}", "foo bar baz", Arrays.asList("a"), Arrays.asList("foobazbar"));
assertMatch("%{*a} %{b} %{&a}", "foo bar baz", Arrays.asList("foo", "b"), Arrays.asList("baz", "bar"));
assertMatch("%{a} %{} %{c}", "foo bar baz", Arrays.asList("a", "c"), Arrays.asList("foo", "baz"));
assertMatch("%{a} %{?skipme} %{c}", "foo bar baz", Arrays.asList("a", "c"), Arrays.asList("foo", "baz"));
assertMatch("%{a},%{b},%{c},%{d}", "foo,,,", Arrays.asList("a", "b", "c", "d"), Arrays.asList("foo", "", "", ""));
assertMatch("%{a->},%{b}", "foo,,,,,,bar", Arrays.asList("a", "b"), Arrays.asList("foo", "bar"));
* Borrowed from Logstash's test cases:
* https://github.com/logstash-plugins/logstash-filter-dissect/blob/master/src/test/java/org/logstash/dissect/DissectorTest.java
* Append Note - Logstash appends with the delimiter as the separator between values, this uses a user defined separator
public void testLogstashSpecs() {
assertMatch("%{a} %{b->} %{c}", "foo bar baz", Arrays.asList("a", "b", "c"), Arrays.asList("foo", "bar", "baz"));
assertMiss("%{a}%{b} %{c}", null);
assertMiss("%{a} %{b}%{c} %{d}", "foo bar baz");
assertMiss("%{a} %{b} %{c}%{d}", "foo bar baz quux");
assertMatch("%{a} %{b->} %{c}", "foo bar baz", Arrays.asList("a", "b", "c"), Arrays.asList("foo", "bar", "baz"));
assertMatch("%{a} %{} %{c}", "foo bar baz", Arrays.asList("a", "c"), Arrays.asList("foo", "baz"));
assertMatch("%{a} %{b} %{+b} %{z}", "foo bar baz quux", Arrays.asList("a", "b", "z"), Arrays.asList("foo", "bar baz", "quux"), " ");
assertMatch("%{a}------->%{b}", "foo------->bar baz quux", Arrays.asList("a", "b"), Arrays.asList("foo", "bar baz quux"));
assertMatch("%{a}------->%{}", "foo------->bar baz quux", Arrays.asList("a"), Arrays.asList("foo"));
assertMatch("%{a} » %{b}»%{c}€%{d}", "foo » bar»baz€quux",
Arrays.asList("a", "b", "c", "d"), Arrays.asList("foo", "bar", "baz", "quux"));
assertMatch("%{a} %{b} %{+a}", "foo bar baz quux", Arrays.asList("a", "b"), Arrays.asList("foo baz quux", "bar"), " ");
//Logstash supports implicit ordering based anchored by the the key without the '+'
//This implementation will only honor implicit ordering for appending right to left else explicit order (/N) is required.
//The results of this test differ from Logstash.
assertMatch("%{+a} %{a} %{+a} %{b}", "December 31 1999 quux",
Arrays.asList("a", "b"), Arrays.asList("December 31 1999", "quux"), " ");
//Same test as above, but with same result as Logstash using explicit ordering in the pattern
assertMatch("%{+a/1} %{a} %{+a/2} %{b}", "December 31 1999 quux",
Arrays.asList("a", "b"), Arrays.asList("31 December 1999", "quux"), " ");
assertMatch("%{+a/2} %{+a/4} %{+a/1} %{+a/3}", "bar quux foo baz", Arrays.asList("a"), Arrays.asList("foo bar baz quux"), " ");
assertMatch("%{+a} %{b}", "foo bar", Arrays.asList("a", "b"), Arrays.asList("foo", "bar"));
assertMatch("%{+a} %{b} %{+a} %{c}", "foo bar baz quux",
Arrays.asList("a", "b", "c"), Arrays.asList("foo baz", "bar", "quux"), " ");
assertMatch("%{} %{syslog_timestamp} %{hostname} %{rt}: %{reason} %{+reason} %{src_ip}/%{src_port}->%{dst_ip}/%{dst_port} " +
"%{polrt} %{+polrt} %{+polrt} %{from_zone} %{to_zone} %{rest}",
"42 2016-05-25T14:47:23Z host.name.com RT_FLOW - RT_FLOW_SESSION_DENY: session denied> None " +
Arrays.asList("syslog_timestamp", "hostname", "rt", "reason", "src_ip", "src_port", "dst_ip", "dst_port", "polrt"
, "from_zone", "to_zone", "rest"),
Arrays.asList("2016-05-25T14:47:23Z", "host.name.com", "RT_FLOW - RT_FLOW_SESSION_DENY", "session denied", "", "60000"
, "", "8090", "None 6(0) DEFAULT-DENY", "ZONE-UNTRUST", "ZONE-DMZ", "UNKNOWN UNKNOWN N/A(N/A) ge-0/0/0.0"), " ");
assertMatch("%{a->} %{b->}---%{c}", "foo bar------------baz",
Arrays.asList("a", "b", "c"), Arrays.asList("foo", "bar", "baz"));
assertMatch("%{->}-%{a}", "-----666", Arrays.asList("a"), Arrays.asList("666"));
assertMatch("%{?skipme->}-%{a}", "-----666", Arrays.asList("a"), Arrays.asList("666"));
assertMatch("%{a},%{b},%{c},%{d},%{e},%{f}", "111,,333,,555,666",
Arrays.asList("a", "b", "c", "d", "e", "f"), Arrays.asList("111", "", "333", "", "555", "666"));
assertMatch("%{a}.࿏.%{b}", "⟳༒.࿏.༒⟲", Arrays.asList("a", "b"), Arrays.asList("⟳༒", "༒⟲"));
assertMatch("%{a}", "子", Arrays.asList("a"), Arrays.asList("子"));
assertMatch("%{a}{\n}%{b}", "aaa{\n}bbb", Arrays.asList("a", "b"), Arrays.asList("aaa", "bbb"));
assertMiss("MACHINE[%{a}] %{b}", "1234567890 MACHINE[foo] bar");
assertMiss("%{a} %{b} %{c}", "foo:bar:baz");
assertMatch("/var/%{key1}/log/%{key2}.log", "/var/foo/log/bar.log", Arrays.asList("key1", "key2"), Arrays.asList("foo", "bar"));
assertMatch("%{a->} %{b}-.-%{c}-%{d}-..-%{e}-%{f}-%{g}-%{h}", "foo bar-.-baz-1111-..-22-333-4444-55555",
Arrays.asList("a", "b", "c", "d", "e", "f", "g", "h"),
Arrays.asList("foo", "bar", "baz", "1111", "22", "333", "4444", "55555"));
public void testBasicMatch() {
String valueFirstInput = "";
String keyFirstPattern = "";
String delimiterFirstInput = "";
String delimiterFirstPattern = "";
//parallel arrays
List<String> expectedKeys = Arrays.asList(generateRandomStringArray(100, 10, false, false));
List<String> expectedValues = new ArrayList<>(expectedKeys.size());
for (String key : expectedKeys) {
String value = randomAsciiAlphanumOfLengthBetween(1, 100);
String delimiter = Integer.toString(randomInt()); //int to ensures values and delimiters don't overlap, else validation can fail
keyFirstPattern += "%{" + key + "}" + delimiter;
valueFirstInput += value + delimiter;
delimiterFirstPattern += delimiter + "%{" + key + "}";
delimiterFirstInput += delimiter + value;
assertMatch(keyFirstPattern, valueFirstInput, expectedKeys, expectedValues);
assertMatch(delimiterFirstPattern, delimiterFirstInput, expectedKeys, expectedValues);
public void testBasicMatchUnicode() {
String valueFirstInput = "";
String keyFirstPattern = "";
String delimiterFirstInput = "";
String delimiterFirstPattern = "";
//parallel arrays
List<String> expectedKeys = new ArrayList<>();
List<String> expectedValues = new ArrayList<>();
for (int i = 0; i < randomIntBetween(1, 100); i++) {
String key = randomAsciiAlphanumOfLengthBetween(1, 100);
String value = randomRealisticUnicodeOfCodepointLengthBetween(1, 100);
String delimiter = Integer.toString(randomInt()); //int to ensures values and delimiters don't overlap, else validation can fail
keyFirstPattern += "%{" + key + "}" + delimiter;
valueFirstInput += value + delimiter;
delimiterFirstPattern += delimiter + "%{" + key + "}";
delimiterFirstInput += delimiter + value;
assertMatch(keyFirstPattern, valueFirstInput, expectedKeys, expectedValues);
assertMatch(delimiterFirstPattern, delimiterFirstInput, expectedKeys, expectedValues);
public void testMatchUnicode() {
assertMatch("%{a} %{b}", "foo 子", Arrays.asList("a", "b"), Arrays.asList("foo", "子"));
assertMatch("%{a}࿏%{b} %{c}", "⟳༒࿏༒⟲ 子", Arrays.asList("a", "b", "c"), Arrays.asList("⟳༒", "༒⟲", "子"));
assertMatch("%{a}࿏%{+a} %{+a}", "⟳༒࿏༒⟲ 子", Arrays.asList("a"), Arrays.asList("⟳༒༒⟲子"));
assertMatch("%{a}࿏%{+a/2} %{+a/1}", "⟳༒࿏༒⟲ 子", Arrays.asList("a"), Arrays.asList("⟳༒子༒⟲"));
assertMatch("%{a->}࿏%{b}", "⟳༒࿏࿏࿏࿏࿏༒⟲", Arrays.asList("a", "b"), Arrays.asList("⟳༒", "༒⟲"));
assertMatch("%{*a}࿏%{&a}", "⟳༒࿏༒⟲", Arrays.asList("⟳༒"), Arrays.asList("༒⟲"));
assertMatch("%{}࿏%{a}", "⟳༒࿏༒⟲", Arrays.asList("a"), Arrays.asList("༒⟲"));
public void testMatchRemainder() {
assertMatch("%{a}", "foo bar the rest", Arrays.asList("a"), Arrays.asList("foo bar the rest"));
assertMatch("%{a} %{b}", "foo bar the rest", Arrays.asList("a", "b"), Arrays.asList("foo", "bar the rest"));
assertMatch("%{} %{b}", "foo bar the rest", Arrays.asList("b"), Arrays.asList("bar the rest"));
assertMatch("%{a} %{b->}", "foo bar the rest", Arrays.asList("a", "b"), Arrays.asList("foo", "bar the rest"));
assertMatch("%{*a} %{&a}", "foo bar the rest", Arrays.asList("foo"), Arrays.asList("bar the rest"));
assertMatch("%{a} %{+a}", "foo bar the rest", Arrays.asList("a"), Arrays.asList("foo bar the rest"), " ");
public void testAppend() {
assertMatch("%{a} %{+a} %{+a}", "foo bar baz", Arrays.asList("a"), Arrays.asList("foobarbaz"));
assertMatch("%{a} %{+a} %{b} %{+b}", "foo bar baz lol", Arrays.asList("a", "b"), Arrays.asList("foobar", "bazlol"));
assertMatch("%{a} %{+a/2} %{+a/1}", "foo bar baz", Arrays.asList("a"), Arrays.asList("foobazbar"));
assertMatch("%{a} %{+a/2} %{+a/1}", "foo bar baz", Arrays.asList("a"), Arrays.asList("foo baz bar"), " ");
public void testAssociate() {
assertMatch("%{*a} %{&a}", "foo bar", Arrays.asList("foo"), Arrays.asList("bar"));
assertMatch("%{&a} %{*a}", "foo bar", Arrays.asList("bar"), Arrays.asList("foo"));
assertMatch("%{*a} %{&a} %{*b} %{&b}", "foo bar baz lol", Arrays.asList("foo", "baz"), Arrays.asList("bar", "lol"));
assertMatch("%{*a} %{&a} %{c} %{*b} %{&b}", "foo bar x baz lol",
Arrays.asList("foo", "baz", "c"), Arrays.asList("bar", "lol", "x"));
assertBadPattern("%{*a} %{a}");
assertBadPattern("%{a} %{&a}");
assertMiss("%{*a} %{&a} {a} %{*b} %{&b}", "foo bar x baz lol");
public void testAppendAndAssociate() {
assertMatch("%{a} %{+a} %{*b} %{&b}", "foo bar baz lol", Arrays.asList("a", "baz"), Arrays.asList("foobar", "lol"));
assertMatch("%{a->} %{+a/2} %{+a/1} %{*b} %{&b}", "foo bar baz lol x",
Arrays.asList("a", "lol"), Arrays.asList("foobazbar", "x"));
public void testEmptyKey() {
assertMatch("%{} %{b}", "foo bar", Arrays.asList("b"), Arrays.asList("bar"));
assertMatch("%{a} %{}", "foo bar", Arrays.asList("a"), Arrays.asList("foo"));
assertMatch("%{->} %{b}", "foo bar", Arrays.asList("b"), Arrays.asList("bar"));
assertMatch("%{->} %{b}", " bar", Arrays.asList("b"), Arrays.asList("bar"));
assertMatch("%{a} %{->}", "foo bar ", Arrays.asList("a"), Arrays.asList("foo"));
public void testNamedSkipKey() {
assertMatch("%{?foo} %{b}", "foo bar", Arrays.asList("b"), Arrays.asList("bar"));
assertMatch("%{?} %{b}", "foo bar", Arrays.asList("b"), Arrays.asList("bar"));
assertMatch("%{a} %{?bar}", "foo bar", Arrays.asList("a"), Arrays.asList("foo"));
assertMatch("%{?foo->} %{b}", "foo bar", Arrays.asList("b"), Arrays.asList("bar"));
assertMatch("%{?->} %{b}", "foo bar", Arrays.asList("b"), Arrays.asList("bar"));
assertMatch("%{?foo->} %{b}", " bar", Arrays.asList("b"), Arrays.asList("bar"));
assertMatch("%{a} %{->?bar}", "foo bar ", Arrays.asList("a"), Arrays.asList("foo"));
assertMatch("%{a} %{?skipme} %{?skipme}", "foo bar baz", Arrays.asList("a"), Arrays.asList("foo"));
assertMatch("%{a} %{?} %{?}", "foo bar baz", Arrays.asList("a"), Arrays.asList("foo"));
public void testConsecutiveDelimiters() {
assertMatch("%{->},%{a}", ",,,,,foo", Arrays.asList("a"), Arrays.asList("foo"));
assertMatch("%{a->},%{b}", ",,,,,foo", Arrays.asList("a", "b"), Arrays.asList("", "foo"));
assertMatch("%{a->},", "foo,,,,,", Arrays.asList("a"), Arrays.asList("foo"));
assertMatch("%{a} %{b},", "foo bar,,,,,", Arrays.asList("a", "b"), Arrays.asList("foo", "bar"));
assertMatch("%{a} %{b->},", "foo bar,,,,,", Arrays.asList("a", "b"), Arrays.asList("foo", "bar"));
assertMatch("%{a->},%{b}", "foo,,,,,bar", Arrays.asList("a", "b"), Arrays.asList("foo", "bar"));
assertMatch("%{a->} %{b}", "foo bar", Arrays.asList("a", "b"), Arrays.asList("foo", "bar"));
assertMatch("%{a->}x%{b}", "fooxxxxxbar", Arrays.asList("a", "b"), Arrays.asList("foo", "bar"));
assertMatch("%{a->} xyz%{b}", "foo xyz xyz xyz xyz xyzbar", Arrays.asList("a", "b"), Arrays.asList("foo", "bar"));
//skipped with empty values
assertMatch("%{a},%{b},%{c},%{d}", "foo,,,", Arrays.asList("a", "b", "c", "d"), Arrays.asList("foo", "", "", ""));
assertMatch("%{a},%{b},%{c},%{d}", "foo,,bar,baz", Arrays.asList("a", "b", "c", "d"), Arrays.asList("foo", "", "bar", "baz"));
assertMatch("%{a},%{b},%{c},%{d}", "foo,,,baz", Arrays.asList("a", "b", "c", "d"), Arrays.asList("foo", "", "", "baz"));
assertMatch("%{a},%{b},%{c},%{d}", ",bar,,baz", Arrays.asList("a", "b", "c", "d"), Arrays.asList("", "bar", "", "baz"));
assertMatch("%{->},%{a->},%{b}", ",,,bar,,baz", Arrays.asList("a", "b"), Arrays.asList("bar", "baz"));
public void testAppendWithConsecutiveDelimiters() {
assertMatch("%{+a/1},%{+a/3}-%{+a/2} %{b}", "foo,bar----baz lol", Arrays.asList("a", "b"), Arrays.asList("foobar", ""));
assertMatch("%{+a/1},%{+a/3->}-%{+a/2} %{b}", "foo,bar----baz lol", Arrays.asList("a", "b"), Arrays.asList("foobazbar", "lol"));
public void testSkipRightPadding() {
assertMatch("%{a->} %{b}", "foo bar", Arrays.asList("a", "b"), Arrays.asList("foo", "bar"));
assertMatch("%{a->} %{b}", "foo bar", Arrays.asList("a", "b"), Arrays.asList("foo", "bar"));
assertMatch("%{->} %{a}", "foo bar", Arrays.asList("a"), Arrays.asList("bar"));
assertMatch("%{a->} %{+a->} %{*b->} %{&b->} %{c}", "foo bar baz lol x",
Arrays.asList("a", "baz", "c"), Arrays.asList("foobar", "lol", "x"));
public void testTrimmedEnd() {
assertMatch("%{a} %{b}", "foo bar", Arrays.asList("a", "b"), Arrays.asList("foo", "bar"));
assertMatch("%{a} %{b->} ", "foo bar ", Arrays.asList("a", "b"), Arrays.asList("foo", "bar"));
//only whitespace is trimmed in the absence of trailing characters
assertMatch("%{a} %{b->}", "foo bar,,,,,,", Arrays.asList("a", "b"), Arrays.asList("foo", "bar,,,,,,"));
//consecutive delimiters + right padding can be used to skip over the trailing delimiters
assertMatch("%{a} %{b->},", "foo bar,,,,,,", Arrays.asList("a", "b"), Arrays.asList("foo", "bar"));
public void testLeadingDelimiter() {
assertMatch(",,,%{a} %{b}", ",,,foo bar", Arrays.asList("a", "b"), Arrays.asList("foo", "bar"));
assertMatch(",%{a} %{b}", ",,foo bar", Arrays.asList("a", "b"), Arrays.asList(",foo", "bar"));
* Runtime errors
public void testMiss() {
assertMiss("%{a}%{b}", "foo");
assertMiss("%{a},%{b}", "foo bar");
assertMiss("%{a}, %{b}", "foo,bar");
assertMiss("x%{a},%{b}", "foo,bar");
assertMiss("x%{},%{b}", "foo,bar");
assertMiss("leading_delimiter_long%{a}", "foo");
assertMiss("%{a}trailing_delimiter_long", "foo");
assertMiss("leading_delimiter_long%{a}trailing_delimiter_long", "foo");
assertMiss("%{a}x", "foo");
assertMiss("%{a},%{b}x", "foo,bar");
* Construction errors
public void testBadPatternOrKey() {
assertBadPattern("%{*a} %{&b}");
public void testSyslog() {
assertMatch("%{timestamp} %{+timestamp} %{+timestamp} %{logsource} %{program}[%{pid}]: %{message}",
"Mar 16 00:01:25 evita postfix/smtpd[1713]: connect from camomile.cloud9.net[]",
Arrays.asList("timestamp", "logsource", "program", "pid", "message"),
Arrays.asList("Mar 16 00:01:25", "evita", "postfix/smtpd", "1713", "connect from camomile.cloud9.net[]"), " ");
public void testApacheLog() {
assertMatch("%{clientip} %{ident} %{auth} [%{timestamp}] \"%{verb} %{request} HTTP/%{httpversion}\" %{response} %{bytes}" +
" \"%{referrer}\" \"%{agent}\" %{->}",
" - - [24/Jul/2014:05:35:37 +0530] \"GET /logs/access.log HTTP/1.0\" 200 69849 " +
"\"http://8rursodiol.enjin.com\" \"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) " +
"Chrome/30.0.1599.12785 YaBrowser/13.12.1599.12785 Safari/537.36\" \"www.dlwindianrailways.com\"",
Arrays.asList("clientip", "ident", "auth", "timestamp", "verb", "request", "httpversion", "response", "bytes",
"referrer", "agent"),
Arrays.asList("", "-", "-", "24/Jul/2014:05:35:37 +0530", "GET", "/logs/access.log", "1.0", "200", "69849",
"http://8rursodiol.enjin.com", "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36" +
" (KHTML, like Gecko) Chrome/30.0.1599.12785 YaBrowser/13.12.1599.12785 Safari/537.36"));
* Shared specification between Beats, Logstash, and Ingest node
public void testJsonSpecification() throws Exception {
ObjectMapper mapper = new ObjectMapper();
JsonNode rootNode = mapper.readTree(this.getClass().getResourceAsStream("/specification/tests.json"));
Iterator<JsonNode> tests = rootNode.elements();
while (tests.hasNext()) {
JsonNode test = tests.next();
boolean skip = test.path("skip").asBoolean();
if (!skip) {
String name = test.path("name").asText();
logger.debug("Running Json specification: " + name);
String pattern = test.path("tok").asText();
String input = test.path("msg").asText();
String append = test.path("append").asText();
boolean fail = test.path("fail").asBoolean();
Iterator<Map.Entry<String, JsonNode>> expected = test.path("expected").fields();
List<String> expectedKeys = new ArrayList<>();
List<String> expectedValues = new ArrayList<>();
expected.forEachRemaining(entry -> {
if (fail) {
assertFail(pattern, input);
} else {
assertMatch(pattern, input, expectedKeys, expectedValues, append);
private DissectException assertFail(String pattern, String input){
return expectThrows(DissectException.class, () -> new DissectParser(pattern, null).parse(input));
private void assertMiss(String pattern, String input) {
DissectException e = assertFail(pattern, input);
assertThat(e.getMessage(), CoreMatchers.containsString("Unable to find match for dissect pattern"));
assertThat(e.getMessage(), CoreMatchers.containsString(pattern));
assertThat(e.getMessage(), input == null ? CoreMatchers.containsString("null") : CoreMatchers.containsString(input));
private void assertBadPattern(String pattern) {
DissectException e = assertFail(pattern, null);
assertThat(e.getMessage(), CoreMatchers.containsString("Unable to parse pattern"));
assertThat(e.getMessage(), CoreMatchers.containsString(pattern));
private void assertBadKey(String pattern, String key) {
DissectException e = assertFail(pattern, null);
assertThat(e.getMessage(), CoreMatchers.containsString("Unable to parse key"));
assertThat(e.getMessage(), CoreMatchers.containsString(key));
private void assertBadKey(String pattern) {
assertBadKey(pattern, pattern.replace("%{", "").replace("}", ""));
private void assertMatch(String pattern, String input, List<String> expectedKeys, List<String> expectedValues) {
assertMatch(pattern, input, expectedKeys, expectedValues, null);
private void assertMatch(String pattern, String input, List<String> expectedKeys, List<String> expectedValues, String appendSeperator) {
Map<String, String> results = new DissectParser(pattern, appendSeperator).parse(input);
List<String> foundKeys = new ArrayList<>(results.keySet());
List<String> foundValues = new ArrayList<>(results.values());
assertThat(foundKeys, Matchers.equalTo(expectedKeys));
assertThat(foundValues, Matchers.equalTo(expectedValues));
Normal file
Normal file
@ -0,0 +1,363 @@
"name": "When all the defined fields are captured by we have remaining data",
"tok": "level=%{level} ts=%{timestamp} caller=%{caller} msg=\"%{message}\"",
"msg": "level=info ts=2018-06-27T17:19:13.036579993Z caller=main.go:222 msg=\"Starting OK\" version=\"(version=2.3.1, branch=HEAD, revision=188ca45bd85ce843071e768d855722a9d9dabe03)\"}",
"expected": {
"caller": "main.go:222",
"level": "info",
"message": "Starting OK",
"timestamp": "2018-06-27T17:19:13.036579993Z"
"skip": false,
"fail": false,
"append": ""
"name": "Complex stack trace",
"tok": "%{day}-%{month}-%{year} %{hour} %{severity} [%{thread_id}] %{origin} %{message}",
"msg": "18-Apr-2018 06:53:20.411 INFO [http-nio-8080-exec-1] org.apache.coyote.http11.Http11Processor.service Error parsing HTTP request header\n Note: further occurrences of HTTP header parsing errors will be logged at DEBUG level.\n java.lang.IllegalArgumentException: Invalid character found in method name. HTTP method names must be tokens\n at org.apache.coyote.http11.Http11InputBuffer.parseRequestLine(Http11InputBuffer.java:426)\n at org.apache.coyote.http11.Http11Processor.service(Http11Processor.java:687)\n at org.apache.coyote.AbstractProcessorLight.process(AbstractProcessorLight.java:66)\n at org.apache.coyote.AbstractProtocol$ConnectionHandler.process(AbstractProtocol.java:790)\n at org.apache.tomcat.util.net.NioEndpoint$SocketProcessor.doRun(NioEndpoint.java:1459)\n at org.apache.tomcat.util.net.SocketProcessorBase.run(SocketProcessorBase.java:49)\n at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)\n at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)\n at org.apache.tomcat.util.threads.TaskThread$WrappingRunnable.run(TaskThread.java:61)\n at java.lang.Thread.run(Thread.java:748)",
"expected": {
"day": "18",
"hour": "06:53:20.411",
"message": "Error parsing HTTP request header\n Note: further occurrences of HTTP header parsing errors will be logged at DEBUG level.\n java.lang.IllegalArgumentException: Invalid character found in method name. HTTP method names must be tokens\n at org.apache.coyote.http11.Http11InputBuffer.parseRequestLine(Http11InputBuffer.java:426)\n at org.apache.coyote.http11.Http11Processor.service(Http11Processor.java:687)\n at org.apache.coyote.AbstractProcessorLight.process(AbstractProcessorLight.java:66)\n at org.apache.coyote.AbstractProtocol$ConnectionHandler.process(AbstractProtocol.java:790)\n at org.apache.tomcat.util.net.NioEndpoint$SocketProcessor.doRun(NioEndpoint.java:1459)\n at org.apache.tomcat.util.net.SocketProcessorBase.run(SocketProcessorBase.java:49)\n at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)\n at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)\n at org.apache.tomcat.util.threads.TaskThread$WrappingRunnable.run(TaskThread.java:61)\n at java.lang.Thread.run(Thread.java:748)",
"month": "Apr",
"origin": "org.apache.coyote.http11.Http11Processor.service",
"severity": "INFO",
"thread_id": "http-nio-8080-exec-1",
"year": "2018"
"skip": false,
"fail": false,
"append": ""
"name": "success when delimiter found at the beginning and end of the string",
"tok": "/var/log/%{key}.log",
"msg": "/var/log/foobar.log",
"expected": {
"key": "foobar"
"skip": false,
"fail": false,
"append": ""
"name": "fails when delimiter is not found at the beginning of the string",
"tok": "/var/log/%{key}.log",
"msg": "foobar",
"expected": null,
"skip": false,
"fail": true,
"append": ""
"name": "fails when delimiter is not found after the key",
"tok": "/var/log/%{key}.log",
"msg": "/var/log/foobar",
"expected": null,
"skip": false,
"fail": true,
"append": ""
"name": "simple dissect",
"tok": "%{key}",
"msg": "foobar",
"expected": {
"key": "foobar"
"skip": false,
"fail": false,
"append": ""
"name": "dissect two replacement",
"tok": "%{key1} %{key2}",
"msg": "foo bar",
"expected": {
"key1": "foo",
"key2": "bar"
"skip": false,
"fail": false,
"append": ""
"name": "fail on partial match",
"tok": "%{key1} %{key2} %{key3}",
"msg": "foo bar",
"expected": null,
"skip": false,
"fail": true,
"append": ""
"name": "one level dissect not end of string",
"tok": "/var/%{key}/log",
"msg": "/var/foobar/log",
"expected": {
"key": "foobar"
"skip": false,
"fail": false,
"append": ""
"name": "one level dissect",
"tok": "/var/%{key}",
"msg": "/var/foobar/log",
"expected": {
"key": "foobar/log"
"skip": false,
"fail": false,
"append": ""
"name": "multiple keys dissect end of string",
"tok": "/var/%{key}/log/%{key1}",
"msg": "/var/foobar/log/apache",
"expected": {
"key": "foobar",
"key1": "apache"
"skip": false,
"fail": false,
"append": ""
"name": "multiple keys not end of string",
"tok": "/var/%{key}/log/%{key1}.log",
"msg": "/var/foobar/log/apache.log",
"expected": {
"key": "foobar",
"key1": "apache"
"skip": false,
"fail": false,
"append": ""
"name": "append with order",
"tok": "%{+key/3} %{+key/1} %{+key/2}",
"msg": "1 2 3",
"expected": {
"key": "231"
"skip": false,
"fail": false,
"append": ""
"name": "append with order and separator",
"tok": "%{+key/3} %{+key/1} %{+key/2}",
"msg": "1 2 3",
"expected": {
"key": "2::3::1"
"skip": false,
"fail": false,
"append": "::"
"name": "append with order and right padding",
"tok": "%{+key/3} %{+key/1-\u003e} %{+key/2}",
"msg": "1 2 3",
"expected": {
"key": "231"
"skip": false,
"fail": false,
"append": ""
"name": "simple append",
"tok": "%{key}-%{+key}-%{+key}",
"msg": "1-2-3",
"expected": {
"key": "123"
"skip": false,
"fail": false,
"append": ""
"name": "simple append with separator",
"tok": "%{key}-%{+key}-%{+key}",
"msg": "1-2-3",
"expected": {
"key": "1,2,3"
"skip": false,
"fail": false,
"append": ","
"name": "reference field",
"tok": "%{*key} %{\u0026key}",
"msg": "hello world",
"expected": {
"hello": "world"
"skip": false,
"fail": false,
"append": ""
"name": "reference field alt order",
"tok": "%{\u0026key} %{*key}",
"msg": "hello world",
"expected": {
"world": "hello"
"skip": false,
"fail": false,
"append": ""
"name": "nameless skip field",
"tok": "%{} %{key}",
"msg": "hello world",
"expected": {
"key": "world"
"skip": false,
"fail": false,
"append": ""
"name": "named skip field",
"tok": "%{?skipme} %{key}",
"msg": "hello world",
"expected": {
"key": "world"
"skip": false,
"fail": false,
"append": ""
"name": "reference without pairing",
"tok": "%{key} %{\u0026key}",
"msg": "hello world",
"expected": null,
"skip": false,
"fail": true,
"append": ""
"name": "missing fields (consecutive delimiters)",
"tok": "%{name},%{addr1},%{addr2},%{addr3},%{city},%{zip}",
"msg": "Jane Doe,4321 Fifth Avenue,,,New York,87432",
"expected": {
"addr1": "4321 Fifth Avenue",
"addr2": "",
"addr3": "",
"city": "New York",
"name": "Jane Doe",
"zip": "87432"
"skip": false,
"fail": false,
"append": ""
"name": "missing fields with right padding (consecutive delimiters)",
"tok": "%{name},%{addr1-\u003e},%{city},%{zip}",
"msg": "Jane Doe,4321 Fifth Avenue,,,New York,87432",
"expected": {
"addr1": "4321 Fifth Avenue",
"city": "New York",
"name": "Jane Doe",
"zip": "87432"
"skip": false,
"fail": false,
"append": ""
"name": "ignore right padding",
"tok": "%{id} %{function-\u003e} %{server}",
"msg": "00000043 ViewReceive machine-321",
"expected": {
"function": "ViewReceive",
"id": "00000043",
"server": "machine-321"
"skip": false,
"fail": false,
"append": ""
"name": "padding on the last key need a delimiter",
"tok": "%{id} %{function} %{server-\u003e} ",
"msg": "00000043 ViewReceive machine-321 ",
"expected": {
"function": "ViewReceive",
"id": "00000043",
"server": "machine-321"
"skip": false,
"fail": false,
"append": ""
"name": "ignore left padding",
"tok": "%{id-\u003e} %{function} %{server}",
"msg": "00000043 ViewReceive machine-321",
"expected": {
"function": "ViewReceive",
"id": "00000043",
"server": "machine-321"
"skip": false,
"fail": false,
"append": ""
"name": "when the delimiters contains `{` and `}`",
"tok": "{%{a}}{%{b}} %{rest}",
"msg": "{c}{d} anything",
"expected": {
"a": "c",
"b": "d",
"rest": "anything"
"skip": false,
"fail": false,
"append": ""
"name": "no keys defined",
"tok": "anything",
"msg": "anything",
"expected": null,
"skip": false,
"fail": true,
"append": ""
"name": "invalid key",
"tok": "%{some?thing}",
"msg": "anything",
"expected": null,
"skip": false,
"fail": true,
"append": ""
"name": "matches non-ascii",
"tok": "%{a}࿏%{b} %{c}",
"msg": "⟳༒࿏༒⟲ 子",
"expected": {
"a": "⟳༒",
"b": "༒⟲",
"c": "子"
"skip": false,
"fail": false,
"append": ""
Reference in New Issue
Block a user