EQL: Add wildcard function (#54020)
* EQL: Add wildcard function * EQL: Cleanup Wildcard.getArguments * EQL: Cleanup Wildcard and rearrange methods * EQL: Wildcard newline lint * EQL: Make StringUtils function final * EQL: Make Wildcard.asLikes return ScalarFunction * QL: Restore BinaryLogic.java * EQL: Add Wildcard PR feedback * EQL: Add Wildcard verification tests * EQL: Switch wildcard to isFoldable test * EQL: Change wildcard test to numeric field * EQL: Remove Wildcard.get_arguments
This commit is contained in:
parent
83e900e248
commit
022f829d84
|
@ -209,7 +209,14 @@ registry where length(bytes_written_string_list) == 2 and bytes_written_string_l
|
|||
|
||||
[[queries]]
|
||||
query = '''
|
||||
registry where key_path == "*\\MACHINE\\SAM\\SAM\\*\\Account\\Us*ers\\00*03E9\\F"'''
|
||||
registry where key_path == "*\\MACHINE\\SAM\\SAM\\*\\Account\\Us*ers\\00*03E9\\F"
|
||||
'''
|
||||
expected_event_ids = [79]
|
||||
|
||||
[[queries]]
|
||||
query = '''
|
||||
registry where wildcard(key_path, "*\\MACHINE\\SAM\\SAM\\*\\Account\\Us*ers\\00*03E9\\F")
|
||||
'''
|
||||
expected_event_ids = [79]
|
||||
|
||||
[[queries]]
|
||||
|
|
|
@ -10,6 +10,7 @@ import org.elasticsearch.xpack.eql.expression.function.scalar.string.EndsWith;
|
|||
import org.elasticsearch.xpack.eql.expression.function.scalar.string.Length;
|
||||
import org.elasticsearch.xpack.eql.expression.function.scalar.string.StartsWith;
|
||||
import org.elasticsearch.xpack.eql.expression.function.scalar.string.Substring;
|
||||
import org.elasticsearch.xpack.eql.expression.function.scalar.string.Wildcard;
|
||||
import org.elasticsearch.xpack.ql.expression.function.FunctionDefinition;
|
||||
import org.elasticsearch.xpack.ql.expression.function.FunctionRegistry;
|
||||
|
||||
|
@ -20,7 +21,7 @@ public class EqlFunctionRegistry extends FunctionRegistry {
|
|||
public EqlFunctionRegistry() {
|
||||
super(functions());
|
||||
}
|
||||
|
||||
|
||||
private static FunctionDefinition[][] functions() {
|
||||
return new FunctionDefinition[][] {
|
||||
// Scalar functions
|
||||
|
@ -29,7 +30,8 @@ public class EqlFunctionRegistry extends FunctionRegistry {
|
|||
def(EndsWith.class, EndsWith::new, "endswith"),
|
||||
def(Length.class, Length::new, "length"),
|
||||
def(StartsWith.class, StartsWith::new, "startswith"),
|
||||
def(Substring.class, Substring::new, "substring")
|
||||
def(Substring.class, Substring::new, "substring"),
|
||||
def(Wildcard.class, Wildcard::new, "wildcard"),
|
||||
}
|
||||
};
|
||||
}
|
||||
|
|
|
@ -0,0 +1,128 @@
|
|||
/*
|
||||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
||||
* or more contributor license agreements. Licensed under the Elastic License;
|
||||
* you may not use this file except in compliance with the Elastic License.
|
||||
*/
|
||||
|
||||
package org.elasticsearch.xpack.eql.expression.function.scalar.string;
|
||||
|
||||
import org.elasticsearch.xpack.eql.EqlIllegalArgumentException;
|
||||
import org.elasticsearch.xpack.eql.util.StringUtils;
|
||||
import org.elasticsearch.xpack.ql.expression.Expression;
|
||||
import org.elasticsearch.xpack.ql.expression.Expressions;
|
||||
import org.elasticsearch.xpack.ql.expression.Expressions.ParamOrdinal;
|
||||
import org.elasticsearch.xpack.ql.expression.function.scalar.ScalarFunction;
|
||||
import org.elasticsearch.xpack.ql.expression.gen.pipeline.Pipe;
|
||||
import org.elasticsearch.xpack.ql.expression.gen.script.ScriptTemplate;
|
||||
import org.elasticsearch.xpack.ql.expression.predicate.logical.Or;
|
||||
import org.elasticsearch.xpack.ql.expression.predicate.regex.Like;
|
||||
import org.elasticsearch.xpack.ql.tree.NodeInfo;
|
||||
import org.elasticsearch.xpack.ql.tree.Source;
|
||||
import org.elasticsearch.xpack.ql.type.DataType;
|
||||
import org.elasticsearch.xpack.ql.type.DataTypes;
|
||||
import org.elasticsearch.xpack.ql.util.CollectionUtils;
|
||||
|
||||
import java.util.Collections;
|
||||
import java.util.List;
|
||||
|
||||
import static org.elasticsearch.xpack.ql.expression.TypeResolutions.isFoldable;
|
||||
import static org.elasticsearch.xpack.ql.expression.TypeResolutions.isString;
|
||||
import static org.elasticsearch.xpack.ql.expression.TypeResolutions.isStringAndExact;
|
||||
|
||||
/**
|
||||
* EQL wildcard function. Matches the form:
|
||||
* wildcard(field, "*wildcard*pattern*", ...)
|
||||
*/
|
||||
public class Wildcard extends ScalarFunction {
|
||||
|
||||
private final Expression field;
|
||||
private final List<Expression> patterns;
|
||||
|
||||
public Wildcard(Source source, Expression field, List<Expression> patterns) {
|
||||
super(source, CollectionUtils.combine(Collections.singletonList(field), patterns));
|
||||
this.field = field;
|
||||
this.patterns = patterns;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected NodeInfo<? extends Expression> info() {
|
||||
return NodeInfo.create(this, Wildcard::new, field, patterns);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Expression replaceChildren(List<Expression> newChildren) {
|
||||
if (newChildren.size() < 2) {
|
||||
throw new IllegalArgumentException("expected at least [2] children but received [" + newChildren.size() + "]");
|
||||
}
|
||||
|
||||
return new Wildcard(source(), newChildren.get(0), newChildren.subList(1, newChildren.size()));
|
||||
}
|
||||
|
||||
@Override
|
||||
public DataType dataType() {
|
||||
return DataTypes.BOOLEAN;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected TypeResolution resolveType() {
|
||||
if (childrenResolved() == false) {
|
||||
return new TypeResolution("Unresolved children");
|
||||
}
|
||||
|
||||
TypeResolution lastResolution = isStringAndExact(field, sourceText(), ParamOrdinal.FIRST);
|
||||
if (lastResolution.unresolved()) {
|
||||
return lastResolution;
|
||||
}
|
||||
|
||||
int index = 1;
|
||||
|
||||
for (Expression p: patterns) {
|
||||
|
||||
lastResolution = isFoldable(p, sourceText(), ParamOrdinal.fromIndex(index));
|
||||
if (lastResolution.unresolved()) {
|
||||
break;
|
||||
}
|
||||
|
||||
lastResolution = isString(p, sourceText(), ParamOrdinal.fromIndex(index));
|
||||
if (lastResolution.unresolved()) {
|
||||
break;
|
||||
}
|
||||
|
||||
index++;
|
||||
}
|
||||
|
||||
return lastResolution;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean foldable() {
|
||||
return Expressions.foldable(children()) && asLikes().foldable();
|
||||
}
|
||||
|
||||
@Override
|
||||
public Object fold() {
|
||||
return asLikes().fold();
|
||||
}
|
||||
|
||||
@Override
|
||||
protected Pipe makePipe() {
|
||||
throw new EqlIllegalArgumentException("Wildcard.makePipe() should not be called directly");
|
||||
}
|
||||
|
||||
@Override
|
||||
public ScriptTemplate asScript() {
|
||||
throw new EqlIllegalArgumentException("Wildcard.asScript() should not be called directly");
|
||||
}
|
||||
|
||||
public ScalarFunction asLikes() {
|
||||
ScalarFunction result = null;
|
||||
|
||||
for (Expression pattern: patterns) {
|
||||
String wcString = pattern.fold().toString();
|
||||
Like like = new Like(source(), field, StringUtils.toLikePattern(wcString));
|
||||
result = result == null ? like : new Or(source(), result, like);
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
}
|
|
@ -6,6 +6,8 @@
|
|||
|
||||
package org.elasticsearch.xpack.eql.optimizer;
|
||||
|
||||
import org.elasticsearch.xpack.eql.expression.function.scalar.string.Wildcard;
|
||||
import org.elasticsearch.xpack.eql.util.StringUtils;
|
||||
import org.elasticsearch.xpack.ql.expression.Expression;
|
||||
import org.elasticsearch.xpack.ql.expression.predicate.logical.Not;
|
||||
import org.elasticsearch.xpack.ql.expression.predicate.nulls.IsNotNull;
|
||||
|
@ -14,7 +16,6 @@ import org.elasticsearch.xpack.ql.expression.predicate.operator.comparison.Binar
|
|||
import org.elasticsearch.xpack.ql.expression.predicate.operator.comparison.Equals;
|
||||
import org.elasticsearch.xpack.ql.expression.predicate.operator.comparison.NotEquals;
|
||||
import org.elasticsearch.xpack.ql.expression.predicate.regex.Like;
|
||||
import org.elasticsearch.xpack.ql.expression.predicate.regex.LikePattern;
|
||||
import org.elasticsearch.xpack.ql.optimizer.OptimizerRules.BooleanLiteralsOnTheRight;
|
||||
import org.elasticsearch.xpack.ql.optimizer.OptimizerRules.BooleanSimplification;
|
||||
import org.elasticsearch.xpack.ql.optimizer.OptimizerRules.CombineBinaryComparisons;
|
||||
|
@ -48,6 +49,7 @@ public class Optimizer extends RuleExecutor<LogicalPlan> {
|
|||
new ReplaceNullChecks(),
|
||||
new PropagateEquals(),
|
||||
new CombineBinaryComparisons(),
|
||||
new ReplaceWildcardFunction(),
|
||||
// prune/elimination
|
||||
new PruneFilters(),
|
||||
new PruneLiteralsInOrderBy()
|
||||
|
@ -60,6 +62,14 @@ public class Optimizer extends RuleExecutor<LogicalPlan> {
|
|||
}
|
||||
|
||||
|
||||
private static class ReplaceWildcardFunction extends OptimizerRule<Filter> {
|
||||
|
||||
@Override
|
||||
protected LogicalPlan rule(Filter filter) {
|
||||
return filter.transformExpressionsUp(e -> e instanceof Wildcard ? ((Wildcard) e).asLikes() : e);
|
||||
}
|
||||
}
|
||||
|
||||
private static class ReplaceWildcards extends OptimizerRule<Filter> {
|
||||
|
||||
private static boolean isWildcard(Expression expr) {
|
||||
|
@ -70,18 +80,6 @@ public class Optimizer extends RuleExecutor<LogicalPlan> {
|
|||
return false;
|
||||
}
|
||||
|
||||
private static LikePattern toLikePattern(String s) {
|
||||
// pick a character that is guaranteed not to be in the string, because it isn't allowed to escape itself
|
||||
char escape = 1;
|
||||
|
||||
// replace wildcards with % and escape special characters
|
||||
String likeString = s.replace("%", escape + "%")
|
||||
.replace("_", escape + "_")
|
||||
.replace("*", "%");
|
||||
|
||||
return new LikePattern(likeString, escape);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected LogicalPlan rule(Filter filter) {
|
||||
return filter.transformExpressionsUp(e -> {
|
||||
|
@ -91,7 +89,7 @@ public class Optimizer extends RuleExecutor<LogicalPlan> {
|
|||
|
||||
if (isWildcard(cmp.right())) {
|
||||
String wcString = cmp.right().fold().toString();
|
||||
Expression like = new Like(e.source(), cmp.left(), toLikePattern(wcString));
|
||||
Expression like = new Like(e.source(), cmp.left(), StringUtils.toLikePattern(wcString));
|
||||
|
||||
if (e instanceof NotEquals) {
|
||||
like = new Not(e.source(), like);
|
||||
|
|
|
@ -0,0 +1,29 @@
|
|||
/*
|
||||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
||||
* or more contributor license agreements. Licensed under the Elastic License;
|
||||
* you may not use this file except in compliance with the Elastic License.
|
||||
*/
|
||||
|
||||
package org.elasticsearch.xpack.eql.util;
|
||||
|
||||
import org.elasticsearch.xpack.ql.expression.predicate.regex.LikePattern;
|
||||
|
||||
public final class StringUtils {
|
||||
|
||||
private StringUtils() {}
|
||||
|
||||
/**
|
||||
* Convert an EQL wildcard string to a LikePattern.
|
||||
*/
|
||||
public static LikePattern toLikePattern(String s) {
|
||||
// pick a character that is guaranteed not to be in the string, because it isn't allowed to escape itself
|
||||
char escape = 1;
|
||||
|
||||
// replace wildcards with % and escape special characters
|
||||
String likeString = s.replace("%", escape + "%")
|
||||
.replace("_", escape + "_")
|
||||
.replace("*", "%");
|
||||
|
||||
return new LikePattern(likeString, escape);
|
||||
}
|
||||
}
|
|
@ -52,7 +52,7 @@ public class ExpressionTests extends ESTestCase {
|
|||
}
|
||||
|
||||
|
||||
public void testStrings() throws Exception {
|
||||
public void testStrings() {
|
||||
assertEquals("hello\"world", unquoteString("'hello\"world'"));
|
||||
assertEquals("hello'world", unquoteString("\"hello'world\""));
|
||||
assertEquals("hello\nworld", unquoteString("'hello\\nworld'"));
|
||||
|
|
|
@ -7,6 +7,7 @@
|
|||
package org.elasticsearch.xpack.eql.planner;
|
||||
|
||||
import org.elasticsearch.xpack.eql.analysis.VerificationException;
|
||||
import org.elasticsearch.xpack.ql.ParsingException;
|
||||
import org.elasticsearch.xpack.ql.QlIllegalArgumentException;
|
||||
|
||||
public class QueryFolderFailTests extends AbstractQueryFolderTestCase {
|
||||
|
@ -48,4 +49,35 @@ public class QueryFolderFailTests extends AbstractQueryFolderTestCase {
|
|||
assertEquals("Found 1 problem\nline 1:15: [startsWith(plain_text, \"foo\")] cannot operate on first argument field of data type "
|
||||
+ "[text]: No keyword/multi-field defined exact matches for [plain_text]; define one or use MATCH/QUERY instead", msg);
|
||||
}
|
||||
|
||||
public void testWildcardNotEnoughArguments() {
|
||||
ParsingException e = expectThrows(ParsingException.class,
|
||||
() -> plan("process where wildcard(process_name)"));
|
||||
String msg = e.getMessage();
|
||||
assertEquals("line 1:16: error building [wildcard]: expects at least two arguments", msg);
|
||||
}
|
||||
|
||||
public void testWildcardAgainstVariable() {
|
||||
VerificationException e = expectThrows(VerificationException.class,
|
||||
() -> plan("process where wildcard(process_name, parent_process_name)"));
|
||||
String msg = e.getMessage();
|
||||
assertEquals("Found 1 problem\nline 1:15: second argument of [wildcard(process_name, parent_process_name)] " +
|
||||
"must be a constant, received [parent_process_name]", msg);
|
||||
}
|
||||
|
||||
public void testWildcardWithNumericPattern() {
|
||||
VerificationException e = expectThrows(VerificationException.class,
|
||||
() -> plan("process where wildcard(process_name, 1)"));
|
||||
String msg = e.getMessage();
|
||||
assertEquals("Found 1 problem\n" +
|
||||
"line 1:15: second argument of [wildcard(process_name, 1)] must be [string], found value [1] type [integer]", msg);
|
||||
}
|
||||
|
||||
public void testWildcardWithNumericField() {
|
||||
VerificationException e = expectThrows(VerificationException.class,
|
||||
() -> plan("process where wildcard(pid, '*.exe')"));
|
||||
String msg = e.getMessage();
|
||||
assertEquals("Found 1 problem\n" +
|
||||
"line 1:15: first argument of [wildcard(pid, '*.exe')] must be [string], found value [pid] type [long]", msg);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -102,3 +102,21 @@ process where substring(file_name, -4) == '.exe'
|
|||
InternalEqlScriptUtils.substring(InternalQlScriptUtils.docValue(doc,params.v0),params.v1,params.v2),params.v3))",
|
||||
"params":{"v0":"file_name.keyword","v1":-4,"v2":null,"v3":".exe"}
|
||||
|
||||
|
||||
wildcardFunctionSingleArgument
|
||||
process where wildcard(process_path, "*\\red_ttp\\wininit.*")
|
||||
"wildcard":{"process_path":{"wildcard":"*\\\\red_ttp\\\\wininit.*"
|
||||
|
||||
|
||||
wildcardFunctionTwoArguments
|
||||
process where wildcard(process_path, "*\\red_ttp\\wininit.*", "*\\abc\\*")
|
||||
"wildcard":{"process_path":{"wildcard":"*\\\\red_ttp\\\\wininit.*"
|
||||
"wildcard":{"process_path":{"wildcard":"*\\\\abc\\\\*"
|
||||
|
||||
|
||||
wildcardFunctionThreeArguments
|
||||
process where wildcard(process_path, "*\\red_ttp\\wininit.*", "*\\abc\\*", "*def*")
|
||||
"wildcard":{"process_path":{"wildcard":"*\\\\red_ttp\\\\wininit.*"
|
||||
"wildcard":{"process_path":{"wildcard":"*\\\\abc\\\\*"
|
||||
"wildcard":{"process_path":{"wildcard":"*def*"
|
||||
|
||||
|
|
|
@ -31,9 +31,20 @@ public final class Expressions {
|
|||
FIRST,
|
||||
SECOND,
|
||||
THIRD,
|
||||
FOURTH
|
||||
FOURTH;
|
||||
|
||||
public static ParamOrdinal fromIndex(int index) {
|
||||
switch (index) {
|
||||
case 0: return ParamOrdinal.FIRST;
|
||||
case 1: return ParamOrdinal.SECOND;
|
||||
case 2: return ParamOrdinal.THIRD;
|
||||
case 3: return ParamOrdinal.FOURTH;
|
||||
default: return ParamOrdinal.DEFAULT;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
private Expressions() {}
|
||||
|
||||
public static NamedExpression wrapAsNamed(Expression exp) {
|
||||
|
@ -205,4 +216,4 @@ public final class Expressions {
|
|||
public static String id(Expression e) {
|
||||
return Integer.toHexString(e.hashCode());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -421,4 +421,26 @@ public class FunctionRegistry {
|
|||
protected interface CastFunctionBuilder<T> {
|
||||
T build(Source source, Expression expression, DataType dataType);
|
||||
}
|
||||
|
||||
@SuppressWarnings("overloads") // These are ambiguous if you aren't using ctor references but we always do
|
||||
public static <T extends Function> FunctionDefinition def(Class<T> function,
|
||||
TwoParametersVariadicBuilder<T> ctorRef, String... names) {
|
||||
FunctionBuilder builder = (source, children, distinct, cfg) -> {
|
||||
boolean hasMinimumOne = OptionalArgument.class.isAssignableFrom(function);
|
||||
if (hasMinimumOne && children.size() < 1) {
|
||||
throw new QlIllegalArgumentException("expects at least one argument");
|
||||
} else if (!hasMinimumOne && children.size() < 2) {
|
||||
throw new QlIllegalArgumentException("expects at least two arguments");
|
||||
}
|
||||
if (distinct) {
|
||||
throw new QlIllegalArgumentException("does not support DISTINCT yet it was specified");
|
||||
}
|
||||
return ctorRef.build(source, children.get(0), children.subList(1, children.size()));
|
||||
};
|
||||
return def(function, builder, false, names);
|
||||
}
|
||||
|
||||
protected interface TwoParametersVariadicBuilder<T> {
|
||||
T build(Source source, Expression src, List<Expression> remaining);
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue