EQL: implement stringContains function (#54380) (#54923)

This commit is contained in:
Aleksandr Maus 2020-04-07 17:55:13 -04:00 committed by GitHub
parent c7053ef824
commit de381271f1
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
13 changed files with 485 additions and 58 deletions

View File

@ -11,6 +11,7 @@ import org.elasticsearch.xpack.eql.expression.function.scalar.string.EndsWith;
import org.elasticsearch.xpack.eql.expression.function.scalar.string.Length;
import org.elasticsearch.xpack.eql.expression.function.scalar.string.StartsWith;
import org.elasticsearch.xpack.eql.expression.function.scalar.string.Substring;
import org.elasticsearch.xpack.eql.expression.function.scalar.string.StringContains;
import org.elasticsearch.xpack.eql.expression.function.scalar.string.Wildcard;
import org.elasticsearch.xpack.ql.expression.function.FunctionDefinition;
import org.elasticsearch.xpack.ql.expression.function.FunctionRegistry;
@ -32,6 +33,7 @@ public class EqlFunctionRegistry extends FunctionRegistry {
def(EndsWith.class, EndsWith::new, "endswith"),
def(Length.class, Length::new, "length"),
def(StartsWith.class, StartsWith::new, "startswith"),
def(StringContains.class, StringContains::new, "stringcontains"),
def(Substring.class, Substring::new, "substring"),
def(Wildcard.class, Wildcard::new, "wildcard"),
}

View File

@ -0,0 +1,116 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License;
* you may not use this file except in compliance with the Elastic License.
*/
package org.elasticsearch.xpack.eql.expression.function.scalar.string;
import org.elasticsearch.xpack.ql.expression.Expression;
import org.elasticsearch.xpack.ql.expression.Expressions;
import org.elasticsearch.xpack.ql.expression.FieldAttribute;
import org.elasticsearch.xpack.ql.expression.function.scalar.ScalarFunction;
import org.elasticsearch.xpack.ql.expression.gen.pipeline.Pipe;
import org.elasticsearch.xpack.ql.expression.gen.script.ScriptTemplate;
import org.elasticsearch.xpack.ql.expression.gen.script.Scripts;
import org.elasticsearch.xpack.ql.tree.NodeInfo;
import org.elasticsearch.xpack.ql.tree.Source;
import org.elasticsearch.xpack.ql.type.DataType;
import org.elasticsearch.xpack.ql.type.DataTypes;
import java.util.Arrays;
import java.util.List;
import java.util.Locale;
import static java.lang.String.format;
import static org.elasticsearch.xpack.eql.expression.function.scalar.string.StringContainsFunctionProcessor.doProcess;
import static org.elasticsearch.xpack.ql.expression.TypeResolutions.isStringAndExact;
import static org.elasticsearch.xpack.ql.expression.gen.script.ParamsBuilder.paramsBuilder;
/**
* EQL specific stringContains function.
* stringContains(a, b)
* Returns true if b is a substring of a
*/
public class StringContains extends ScalarFunction {
private final Expression string, substring;
public StringContains(Source source, Expression string, Expression substring) {
super(source, Arrays.asList(string, substring));
this.string = string;
this.substring = substring;
}
@Override
protected TypeResolution resolveType() {
if (!childrenResolved()) {
return new TypeResolution("Unresolved children");
}
TypeResolution resolution = isStringAndExact(string, sourceText(), Expressions.ParamOrdinal.FIRST);
if (resolution.unresolved()) {
return resolution;
}
return isStringAndExact(substring, sourceText(), Expressions.ParamOrdinal.SECOND);
}
@Override
protected Pipe makePipe() {
return new StringContainsFunctionPipe(source(), this,
Expressions.pipe(string), Expressions.pipe(substring));
}
@Override
public boolean foldable() {
return string.foldable() && substring.foldable();
}
@Override
public Object fold() {
return doProcess(string.fold(), substring.fold());
}
@Override
protected NodeInfo<? extends Expression> info() {
return NodeInfo.create(this, StringContains::new, string, substring);
}
@Override
public ScriptTemplate asScript() {
return asScriptFrom(asScript(string), asScript(substring));
}
protected ScriptTemplate asScriptFrom(ScriptTemplate stringScript, ScriptTemplate substringScript) {
return new ScriptTemplate(format(Locale.ROOT, formatTemplate("{eql}.%s(%s,%s)"),
"stringContains",
stringScript.template(),
substringScript.template()),
paramsBuilder()
.script(stringScript.params())
.script(substringScript.params())
.build(), dataType());
}
@Override
public ScriptTemplate scriptWithField(FieldAttribute field) {
return new ScriptTemplate(processScript(Scripts.DOC_VALUE),
paramsBuilder().variable(field.exactAttribute().name()).build(),
dataType());
}
@Override
public DataType dataType() {
return DataTypes.BOOLEAN;
}
@Override
public Expression replaceChildren(List<Expression> newChildren) {
if (newChildren.size() != 2) {
throw new IllegalArgumentException("expected [2] children but received [" + newChildren.size() + "]");
}
return new StringContains(source(), newChildren.get(0), newChildren.get(1));
}
}

View File

@ -0,0 +1,106 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License;
* you may not use this file except in compliance with the Elastic License.
*/
package org.elasticsearch.xpack.eql.expression.function.scalar.string;
import org.elasticsearch.xpack.ql.execution.search.QlSourceBuilder;
import org.elasticsearch.xpack.ql.expression.Expression;
import org.elasticsearch.xpack.ql.expression.gen.pipeline.Pipe;
import org.elasticsearch.xpack.ql.tree.NodeInfo;
import org.elasticsearch.xpack.ql.tree.Source;
import java.util.Arrays;
import java.util.List;
import java.util.Objects;
public class StringContainsFunctionPipe extends Pipe {
private final Pipe string, substring;
public StringContainsFunctionPipe(Source source, Expression expression, Pipe string, Pipe substring) {
super(source, expression, Arrays.asList(string, substring));
this.string = string;
this.substring = substring;
}
@Override
public final Pipe replaceChildren(List<Pipe> newChildren) {
if (newChildren.size() != 2) {
throw new IllegalArgumentException("expected [2] children but received [" + newChildren.size() + "]");
}
return replaceChildren(newChildren.get(0), newChildren.get(1));
}
@Override
public final Pipe resolveAttributes(AttributeResolver resolver) {
Pipe newString = string.resolveAttributes(resolver);
Pipe newSubstring = substring.resolveAttributes(resolver);
if (newString == string && newSubstring == substring) {
return this;
}
return replaceChildren(newString, newSubstring);
}
@Override
public boolean supportedByAggsOnlyQuery() {
return string.supportedByAggsOnlyQuery() && substring.supportedByAggsOnlyQuery();
}
@Override
public boolean resolved() {
return string.resolved() && substring.resolved();
}
protected Pipe replaceChildren(Pipe string, Pipe substring) {
return new StringContainsFunctionPipe(source(), expression(), string, substring);
}
@Override
public final void collectFields(QlSourceBuilder sourceBuilder) {
string.collectFields(sourceBuilder);
substring.collectFields(sourceBuilder);
}
@Override
protected NodeInfo<StringContainsFunctionPipe> info() {
return NodeInfo.create(this, StringContainsFunctionPipe::new, expression(), string, substring);
}
@Override
public StringContainsFunctionProcessor asProcessor() {
return new StringContainsFunctionProcessor(string.asProcessor(), substring.asProcessor());
}
public Pipe string() {
return string;
}
public Pipe substring() {
return substring;
}
@Override
public int hashCode() {
return Objects.hash(source(), string(), substring());
}
@Override
public boolean equals(Object obj) {
if (this == obj) {
return true;
}
if (obj == null || getClass() != obj.getClass()) {
return false;
}
StringContainsFunctionPipe other = (StringContainsFunctionPipe) obj;
return Objects.equals(source(), other.source())
&& Objects.equals(string(), other.string())
&& Objects.equals(substring(), other.substring());
}
}

View File

@ -0,0 +1,96 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License;
* you may not use this file except in compliance with the Elastic License.
*/
package org.elasticsearch.xpack.eql.expression.function.scalar.string;
import org.elasticsearch.common.io.stream.StreamInput;
import org.elasticsearch.common.io.stream.StreamOutput;
import org.elasticsearch.xpack.eql.EqlIllegalArgumentException;
import org.elasticsearch.xpack.ql.expression.gen.processor.Processor;
import java.io.IOException;
import java.util.Objects;
public class StringContainsFunctionProcessor implements Processor {
public static final String NAME = "sstc";
private final Processor string, substring;
public StringContainsFunctionProcessor(Processor string, Processor substring) {
this.string = string;
this.substring = substring;
}
public StringContainsFunctionProcessor(StreamInput in) throws IOException {
string = in.readNamedWriteable(Processor.class);
substring = in.readNamedWriteable(Processor.class);
}
@Override
public final void writeTo(StreamOutput out) throws IOException {
out.writeNamedWriteable(string);
out.writeNamedWriteable(substring);
}
@Override
public Object process(Object input) {
return doProcess(string.process(input), substring.process(input));
}
public static Object doProcess(Object string, Object substring) {
if (string == null) {
return null;
}
throwIfNotString(string);
throwIfNotString(substring);
String strString = string.toString();
String strSubstring = substring.toString();
return StringUtils.stringContains(strString, strSubstring);
}
private static void throwIfNotString(Object obj) {
if (!(obj instanceof String || obj instanceof Character)) {
throw new EqlIllegalArgumentException("A string/char is required; received [{}]", obj);
}
}
protected Processor string() {
return string;
}
public Processor substring() {
return substring;
}
@Override
public boolean equals(Object obj) {
if (this == obj) {
return true;
}
if (obj == null || getClass() != obj.getClass()) {
return false;
}
StringContainsFunctionProcessor other = (StringContainsFunctionProcessor) obj;
return Objects.equals(string(), other.string())
&& Objects.equals(substring(), other.substring());
}
@Override
public int hashCode() {
return Objects.hash(string(), substring());
}
@Override
public String getWriteableName() {
return NAME;
}
}

View File

@ -60,6 +60,23 @@ final class StringUtils {
return string.substring(start, idx);
}
/**
* Checks if {@code string} contains {@code substring} string.
*
* @param string string to search through.
* @param substring string to search for.
* @return {@code true} if {@code string} string contains {@code substring} string.
*/
static boolean stringContains(String string, String substring) {
if (hasLength(string) == false || hasLength(substring) == false) {
return false;
}
string = string.toLowerCase(Locale.ROOT);
substring = substring.toLowerCase(Locale.ROOT);
return string.contains(substring);
}
/**
* Returns a substring using the Python slice semantics, meaning
* start and end can be negative
@ -70,7 +87,7 @@ final class StringUtils {
}
int length = string.length();
// handle first negative values
if (start < 0) {
start += length;

View File

@ -127,4 +127,4 @@ public class Substring extends ScalarFunction implements OptionalArgument {
return new Substring(source(), newChildren.get(0), newChildren.get(1), newChildren.get(2));
}
}
}

View File

@ -11,6 +11,7 @@ import org.elasticsearch.xpack.eql.expression.function.scalar.string.EndsWithFun
import org.elasticsearch.xpack.eql.expression.function.scalar.string.LengthFunctionProcessor;
import org.elasticsearch.xpack.eql.expression.function.scalar.string.StartsWithFunctionProcessor;
import org.elasticsearch.xpack.eql.expression.function.scalar.string.SubstringFunctionProcessor;
import org.elasticsearch.xpack.eql.expression.function.scalar.string.StringContainsFunctionProcessor;
import org.elasticsearch.xpack.ql.expression.function.scalar.whitelist.InternalQlScriptUtils;
/*
@ -38,6 +39,10 @@ public class InternalEqlScriptUtils extends InternalQlScriptUtils {
return (Boolean) StartsWithFunctionProcessor.doProcess(s, pattern);
}
public static Boolean stringContains(String string, String substring) {
return (Boolean) StringContainsFunctionProcessor.doProcess(string, substring);
}
public static String substring(String s, Number start, Number end) {
return (String) SubstringFunctionProcessor.doProcess(s, start, end);
}

View File

@ -59,5 +59,6 @@ class org.elasticsearch.xpack.eql.expression.function.scalar.whitelist.InternalE
Boolean endsWith(String, String)
Integer length(String)
Boolean startsWith(String, String)
Boolean stringContains(String, String)
String substring(String, Number, Number)
}

View File

@ -101,7 +101,7 @@ public class VerifierTests extends ESTestCase {
// Some functions fail with "Unsupported" message at the parse stage
public void testArrayFunctionsUnsupported() {
assertEquals("1:16: Unknown function [arrayContains]",
assertEquals("1:16: Unknown function [arrayContains], did you mean [stringcontains]?",
error("registry where arrayContains(bytes_written_string_list, 'En')"));
assertEquals("1:16: Unknown function [arraySearch]",
error("registry where arraySearch(bytes_written_string_list, bytes_written_string, true)"));
@ -119,8 +119,6 @@ public class VerifierTests extends ESTestCase {
// Test the known EQL functions that are not supported
public void testFunctionVerificationUnknown() {
assertEquals("1:25: Unknown function [stringContains]",
error("file where opcode=0 and stringContains('ABCDEFGHIexplorer.exeJKLMNOP', file_name)"));
assertEquals("1:25: Unknown function [indexOf]",
error("file where opcode=0 and indexOf(file_name, 'plore') == 2"));
assertEquals("1:15: Unknown function [add]",

View File

@ -0,0 +1,49 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License;
* you may not use this file except in compliance with the Elastic License.
*/
package org.elasticsearch.xpack.eql.expression.function.scalar.string;
import org.elasticsearch.test.ESTestCase;
import org.elasticsearch.xpack.eql.EqlIllegalArgumentException;
import org.elasticsearch.xpack.eql.expression.function.scalar.string.StringContainsFunctionProcessor;
import java.util.concurrent.Callable;
import static org.hamcrest.Matchers.equalTo;
public class StringContainsFunctionProcessorTests extends ESTestCase {
protected static final int NUMBER_OF_TEST_RUNS = 20;
protected static void run(Callable<Void> callable) throws Exception {
for (int runs = 0; runs < NUMBER_OF_TEST_RUNS; runs++) {
callable.call();
}
}
public void testNullOrEmptyParameters() throws Exception {
run(() -> {
String substring = randomBoolean() ? null : randomAlphaOfLength(10);
String str = randomBoolean() ? null : randomAlphaOfLength(10);
if (str != null && substring != null) {
str += substring;
str += randomAlphaOfLength(10);
}
final String string = str;
// The string parameter can be null. Expect exception if any of other parameters is null.
if ((string != null) && (substring == null)) {
EqlIllegalArgumentException e = expectThrows(EqlIllegalArgumentException.class,
() -> StringContainsFunctionProcessor.doProcess(string, substring));
assertThat(e.getMessage(), equalTo("A string/char is required; received [null]"));
} else {
assertThat(StringContainsFunctionProcessor.doProcess(string, substring),
equalTo(string == null ? null : true));
}
return null;
});
}
}

View File

@ -8,6 +8,7 @@ package org.elasticsearch.xpack.eql.expression.function.scalar.string;
import org.elasticsearch.test.ESTestCase;
import static org.elasticsearch.xpack.eql.expression.function.scalar.string.StringUtils.stringContains;
import static org.elasticsearch.xpack.eql.expression.function.scalar.string.StringUtils.substringSlice;
import static org.elasticsearch.xpack.ql.util.StringUtils.EMPTY;
import static org.hamcrest.Matchers.equalTo;
@ -136,6 +137,17 @@ public class StringUtilsTests extends ESTestCase {
assertThat(StringUtils.between("System Idle Process", "Y", "e", false, true),
equalTo(""));
}
public void testStringContainsWithNullOrEmpty() {
assertFalse(stringContains(null, null));
assertFalse(stringContains(null, ""));
assertFalse(stringContains("", null));
}
public void testStringContainsWithRandom() throws Exception {
String substring = randomAlphaOfLength(10);
String string = randomAlphaOfLength(10) + substring + randomAlphaOfLength(10);
assertTrue(stringContains(string, substring));
}
}

View File

@ -11,6 +11,57 @@ import org.elasticsearch.xpack.ql.ParsingException;
import org.elasticsearch.xpack.ql.QlIllegalArgumentException;
public class QueryFolderFailTests extends AbstractQueryFolderTestCase {
private String error(String query) {
VerificationException e = expectThrows(VerificationException.class, () -> plan(query));
assertTrue(e.getMessage().startsWith("Found "));
final String header = "Found 1 problem\nline ";
return e.getMessage().substring(header.length());
}
private String errorParsing(String eql) {
ParsingException e = expectThrows(ParsingException.class, () -> plan(eql));
final String header = "line ";
assertTrue(e.getMessage().startsWith(header));
return e.getMessage().substring(header.length());
}
public void testBetweenMissingOrNullParams() {
final String[] queries = {
"process where between() == \"yst\"",
"process where between(process_name) == \"yst\"",
"process where between(process_name, \"s\") == \"yst\"",
"process where between(null) == \"yst\"",
"process where between(process_name, null) == \"yst\"",
"process where between(process_name, \"s\", \"e\", false, false, true) == \"yst\"",
};
for (String query : queries) {
ParsingException e = expectThrows(ParsingException.class,
() -> plan(query));
assertEquals("line 1:16: error building [between]: expects between three and five arguments", e.getMessage());
}
}
public void testBetweenWrongTypeParams() {
assertEquals("1:15: second argument of [between(process_name, 1, 2)] must be [string], found value [1] type [integer]",
error("process where between(process_name, 1, 2)"));
assertEquals("1:15: third argument of [between(process_name, \"s\", 2)] must be [string], found value [2] type [integer]",
error("process where between(process_name, \"s\", 2)"));
assertEquals("1:15: fourth argument of [between(process_name, \"s\", \"e\", 1)] must be [boolean], found value [1] type [integer]",
error("process where between(process_name, \"s\", \"e\", 1)"));
assertEquals("1:15: fourth argument of [between(process_name, \"s\", \"e\", \"true\")] must be [boolean], " +
"found value [\"true\"] type [keyword]",
error("process where between(process_name, \"s\", \"e\", \"true\")"));
assertEquals("1:15: fifth argument of [between(process_name, \"s\", \"e\", false, 2)] must be [boolean], " +
"found value [2] type [integer]",
error("process where between(process_name, \"s\", \"e\", false, 2)"));
}
public void testPropertyEquationFilterUnsupported() {
QlIllegalArgumentException e = expectThrows(QlIllegalArgumentException.class,
() -> plan("process where (serial_event_id<9 and serial_event_id >= 7) or (opcode == pid)"));
@ -23,7 +74,7 @@ public class QueryFolderFailTests extends AbstractQueryFolderTestCase {
() -> plan("process where opcode in (1,3) and process_name in (parent_process_name, \"SYSTEM\")"));
String msg = e.getMessage();
assertEquals("Found 1 problem\nline 1:35: Comparisons against variables are not (currently) supported; " +
"offender [parent_process_name] in [process_name in (parent_process_name, \"SYSTEM\")]", msg);
"offender [parent_process_name] in [process_name in (parent_process_name, \"SYSTEM\")]", msg);
}
public void testLengthFunctionWithInexact() {
@ -50,79 +101,45 @@ public class QueryFolderFailTests extends AbstractQueryFolderTestCase {
+ "[text]: No keyword/multi-field defined exact matches for [plain_text]; define one or use MATCH/QUERY instead", msg);
}
public void testStringContainsWrongParams() {
assertEquals("1:16: error building [stringcontains]: expects exactly two arguments",
errorParsing("process where stringContains()"));
assertEquals("1:16: error building [stringcontains]: expects exactly two arguments",
errorParsing("process where stringContains(process_name)"));
assertEquals("1:15: second argument of [stringContains(process_name, 1)] must be [string], found value [1] type [integer]",
error("process where stringContains(process_name, 1)"));
}
public void testWildcardNotEnoughArguments() {
ParsingException e = expectThrows(ParsingException.class,
() -> plan("process where wildcard(process_name)"));
() -> plan("process where wildcard(process_name)"));
String msg = e.getMessage();
assertEquals("line 1:16: error building [wildcard]: expects at least two arguments", msg);
}
public void testWildcardAgainstVariable() {
VerificationException e = expectThrows(VerificationException.class,
() -> plan("process where wildcard(process_name, parent_process_name)"));
() -> plan("process where wildcard(process_name, parent_process_name)"));
String msg = e.getMessage();
assertEquals("Found 1 problem\nline 1:15: second argument of [wildcard(process_name, parent_process_name)] " +
"must be a constant, received [parent_process_name]", msg);
"must be a constant, received [parent_process_name]", msg);
}
public void testWildcardWithNumericPattern() {
VerificationException e = expectThrows(VerificationException.class,
() -> plan("process where wildcard(process_name, 1)"));
() -> plan("process where wildcard(process_name, 1)"));
String msg = e.getMessage();
assertEquals("Found 1 problem\n" +
"line 1:15: second argument of [wildcard(process_name, 1)] must be [string], found value [1] type [integer]", msg);
"line 1:15: second argument of [wildcard(process_name, 1)] must be [string], found value [1] type [integer]", msg);
}
public void testWildcardWithNumericField() {
VerificationException e = expectThrows(VerificationException.class,
() -> plan("process where wildcard(pid, '*.exe')"));
() -> plan("process where wildcard(pid, '*.exe')"));
String msg = e.getMessage();
assertEquals("Found 1 problem\n" +
"line 1:15: first argument of [wildcard(pid, '*.exe')] must be [string], found value [pid] type [long]", msg);
}
public void testBetweenMissingOrNullParams() {
final String[] queries = {
"process where between() == \"yst\"",
"process where between(process_name) == \"yst\"",
"process where between(process_name, \"s\") == \"yst\"",
"process where between(null) == \"yst\"",
"process where between(process_name, null) == \"yst\"",
"process where between(process_name, \"s\", \"e\", false, false, true) == \"yst\"",
};
for (String query : queries) {
ParsingException e = expectThrows(ParsingException.class,
() -> plan(query));
assertEquals("line 1:16: error building [between]: expects between three and five arguments", e.getMessage());
}
}
private String error(String query) {
VerificationException e = expectThrows(VerificationException.class,
() -> plan(query));
assertTrue(e.getMessage().startsWith("Found "));
final String header = "Found 1 problem\nline ";
return e.getMessage().substring(header.length());
}
public void testBetweenWrongTypeParams() {
assertEquals("1:15: second argument of [between(process_name, 1, 2)] must be [string], found value [1] type [integer]",
error("process where between(process_name, 1, 2)"));
assertEquals("1:15: third argument of [between(process_name, \"s\", 2)] must be [string], found value [2] type [integer]",
error("process where between(process_name, \"s\", 2)"));
assertEquals("1:15: fourth argument of [between(process_name, \"s\", \"e\", 1)] must be [boolean], found value [1] type [integer]",
error("process where between(process_name, \"s\", \"e\", 1)"));
assertEquals("1:15: fourth argument of [between(process_name, \"s\", \"e\", \"true\")] must be [boolean], " +
"found value [\"true\"] type [keyword]",
error("process where between(process_name, \"s\", \"e\", \"true\")"));
assertEquals("1:15: fifth argument of [between(process_name, \"s\", \"e\", false, 2)] must be [boolean], " +
"found value [2] type [integer]",
error("process where between(process_name, \"s\", \"e\", false, 2)"));
"line 1:15: first argument of [wildcard(pid, '*.exe')] must be [string], found value [pid] type [long]", msg);
}
}

View File

@ -105,6 +105,14 @@ InternalQlScriptUtils.docValue(doc,params.v0),params.v1))",
"params":{"v0":"user_name","v1":"A"}
;
stringContains
process where stringContains(process_name, "foo")
;
"script":{"source":"InternalQlScriptUtils.nullSafeFilter(InternalEqlScriptUtils.stringContains(
InternalQlScriptUtils.docValue(doc,params.v0),params.v1))"
"params":{"v0":"process_name","v1":"foo"}
;
substringFunction
process where substring(file_name, -4) == '.exe'
;