SQL: Fix translation of LIKE/RLIKE keywords (#36672)

* SQL: Fix translation of LIKE/RLIKE keywords

Refactor Like/RLike functions to simplify internals and improve query
 translation when chained or within a script context.

Fix #36039
Fix #36584
This commit is contained in:
Costin Leau 2018-12-17 18:58:06 +02:00 committed by GitHub
parent 4103d3b9ec
commit 6d9d5e397b
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
14 changed files with 169 additions and 165 deletions

View File

@ -280,6 +280,8 @@ aggMaxWithAlias
SELECT gender g, MAX(emp_no) m FROM "test_emp" GROUP BY g ORDER BY gender;
aggMaxOnDate
SELECT gender, MAX(birth_date) m FROM "test_emp" GROUP BY gender ORDER BY gender;
aggAvgAndMaxWithLikeFilter
SELECT CAST(AVG(salary) AS LONG) AS avg, CAST(SUM(salary) AS LONG) AS s FROM "test_emp" WHERE first_name LIKE 'G%';
// Conditional MAX
aggMaxWithHaving

View File

@ -119,7 +119,10 @@ SELECT DAY_OF_WEEK(birth_date) day, COUNT(*) c FROM test_emp WHERE DAY_OF_WEEK(b
currentTimestampYear
SELECT YEAR(CURRENT_TIMESTAMP()) AS result;
currentTimestampMonth
//
// H2 uses the local timezone instead of the specified one
//
currentTimestampMonth-Ignore
SELECT MONTH(CURRENT_TIMESTAMP()) AS result;
currentTimestampHour-Ignore

View File

@ -49,6 +49,8 @@ whereFieldWithNotEqualsOnString
SELECT last_name l FROM "test_emp" WHERE emp_no < 10003 AND gender <> 'M';
whereFieldWithLikeMatch
SELECT last_name l FROM "test_emp" WHERE emp_no < 10003 AND last_name LIKE 'K%';
whereFieldWithNotLikeMatch
SELECT last_name l FROM "test_emp" WHERE emp_no < 10020 AND first_name NOT LIKE 'Ma%';
whereFieldWithOrderNot
SELECT last_name l FROM "test_emp" WHERE NOT emp_no < 10003 ORDER BY emp_no LIMIT 5;

View File

@ -165,6 +165,7 @@ public final class InternalSqlScriptUtils {
// Regex
//
public static Boolean regex(String value, String pattern) {
// TODO: this needs to be improved to avoid creating the pattern on every call
return RegexOperation.match(value, pattern);
}

View File

@ -11,26 +11,24 @@ import org.elasticsearch.xpack.sql.tree.NodeInfo;
public class Like extends RegexMatch {
public Like(Location location, Expression left, LikePattern right) {
super(location, left, right);
private final LikePattern pattern;
public Like(Location location, Expression left, LikePattern pattern) {
super(location, left, pattern.asJavaRegex());
this.pattern = pattern;
}
public LikePattern pattern() {
return pattern;
}
@Override
protected NodeInfo<Like> info() {
return NodeInfo.create(this, Like::new, left(), pattern());
}
public LikePattern pattern() {
return (LikePattern) right();
return NodeInfo.create(this, Like::new, field(), pattern);
}
@Override
protected Like replaceChildren(Expression newLeft, Expression newRight) {
return new Like(location(), newLeft, (LikePattern) newRight);
}
@Override
protected String asString(Expression pattern) {
return ((LikePattern) pattern).asJavaRegex();
protected Like replaceChild(Expression newLeft) {
return new Like(location(), newLeft, pattern);
}
}

View File

@ -5,10 +5,6 @@
*/
package org.elasticsearch.xpack.sql.expression.predicate.regex;
import org.elasticsearch.xpack.sql.expression.LeafExpression;
import org.elasticsearch.xpack.sql.tree.Location;
import org.elasticsearch.xpack.sql.tree.NodeInfo;
import org.elasticsearch.xpack.sql.type.DataType;
import org.elasticsearch.xpack.sql.util.StringUtils;
import java.util.Objects;
@ -21,7 +17,7 @@ import java.util.Objects;
*
* To prevent conflicts with ES, the string and char must be validated to not contain '*'.
*/
public class LikePattern extends LeafExpression {
public class LikePattern {
private final String pattern;
private final char escape;
@ -30,8 +26,7 @@ public class LikePattern extends LeafExpression {
private final String wildcard;
private final String indexNameWildcard;
public LikePattern(Location location, String pattern, char escape) {
super(location);
public LikePattern(String pattern, char escape) {
this.pattern = pattern;
this.escape = escape;
// early initialization to force string validation
@ -40,11 +35,6 @@ public class LikePattern extends LeafExpression {
this.indexNameWildcard = StringUtils.likeToIndexWildcard(pattern, escape);
}
@Override
protected NodeInfo<LikePattern> info() {
return NodeInfo.create(this, LikePattern::new, pattern, escape);
}
public String pattern() {
return pattern;
}
@ -74,16 +64,6 @@ public class LikePattern extends LeafExpression {
return indexNameWildcard;
}
@Override
public boolean nullable() {
return false;
}
@Override
public DataType dataType() {
return DataType.KEYWORD;
}
@Override
public int hashCode() {
return Objects.hash(pattern, escape);

View File

@ -6,28 +6,29 @@
package org.elasticsearch.xpack.sql.expression.predicate.regex;
import org.elasticsearch.xpack.sql.expression.Expression;
import org.elasticsearch.xpack.sql.expression.Literal;
import org.elasticsearch.xpack.sql.tree.Location;
import org.elasticsearch.xpack.sql.tree.NodeInfo;
public class RLike extends RegexMatch {
public RLike(Location location, Expression left, Literal right) {
super(location, left, right);
private final String pattern;
public RLike(Location location, Expression left, String pattern) {
super(location, left, pattern);
this.pattern = pattern;
}
public String pattern() {
return pattern;
}
@Override
protected NodeInfo<RLike> info() {
return NodeInfo.create(this, RLike::new, left(), (Literal) right());
return NodeInfo.create(this, RLike::new, field(), pattern);
}
@Override
protected RLike replaceChildren(Expression newLeft, Expression newRight) {
return new RLike(location(), newLeft, (Literal) newRight);
}
@Override
protected String asString(Expression pattern) {
return pattern.fold().toString();
protected RLike replaceChild(Expression newChild) {
return new RLike(location(), newChild, pattern);
}
}

View File

@ -7,15 +7,19 @@
package org.elasticsearch.xpack.sql.expression.predicate.regex;
import org.elasticsearch.xpack.sql.expression.Expression;
import org.elasticsearch.xpack.sql.expression.predicate.BinaryPredicate;
import org.elasticsearch.xpack.sql.expression.function.scalar.UnaryScalarFunction;
import org.elasticsearch.xpack.sql.expression.gen.processor.Processor;
import org.elasticsearch.xpack.sql.expression.predicate.regex.RegexProcessor.RegexOperation;
import org.elasticsearch.xpack.sql.tree.Location;
import org.elasticsearch.xpack.sql.type.DataType;
public abstract class RegexMatch extends BinaryPredicate<String, String, Boolean, RegexOperation> {
public abstract class RegexMatch extends UnaryScalarFunction {
protected RegexMatch(Location location, Expression value, Expression pattern) {
super(location, value, pattern, RegexOperation.INSTANCE);
private final String pattern;
protected RegexMatch(Location location, Expression value, String pattern) {
super(location, value);
this.pattern = pattern;
}
@Override
@ -23,18 +27,25 @@ public abstract class RegexMatch extends BinaryPredicate<String, String, Boolean
return DataType.BOOLEAN;
}
@Override
public boolean nullable() {
return field().nullable() && pattern != null;
}
@Override
public boolean foldable() {
// right() is not directly foldable in any context but Like can fold it.
return left().foldable();
return field().foldable();
}
@Override
public Boolean fold() {
Object val = left().fold();
val = val != null ? val.toString() : val;
return function().apply((String) val, asString(right()));
Object val = field().fold();
return RegexOperation.match(val, pattern);
}
protected abstract String asString(Expression pattern);
@Override
protected Processor makeProcessor() {
return new RegexProcessor(pattern);
}
}

View File

@ -1,34 +0,0 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License;
* you may not use this file except in compliance with the Elastic License.
*/
package org.elasticsearch.xpack.sql.expression.predicate.regex;
import org.elasticsearch.xpack.sql.expression.Expression;
import org.elasticsearch.xpack.sql.expression.gen.pipeline.BinaryPipe;
import org.elasticsearch.xpack.sql.expression.gen.pipeline.Pipe;
import org.elasticsearch.xpack.sql.tree.Location;
import org.elasticsearch.xpack.sql.tree.NodeInfo;
public class RegexPipe extends BinaryPipe {
public RegexPipe(Location location, Expression expression, Pipe left, Pipe right) {
super(location, expression, left, right);
}
@Override
protected NodeInfo<RegexPipe> info() {
return NodeInfo.create(this, RegexPipe::new, expression(), left(), right());
}
@Override
protected BinaryPipe replaceChildren(Pipe left, Pipe right) {
return new RegexPipe(location(), expression(), left, right);
}
@Override
public RegexProcessor asProcessor() {
return new RegexProcessor(left().asProcessor(), right().asProcessor());
}
}

View File

@ -7,66 +7,47 @@ package org.elasticsearch.xpack.sql.expression.predicate.regex;
import org.elasticsearch.common.io.stream.StreamInput;
import org.elasticsearch.common.io.stream.StreamOutput;
import org.elasticsearch.xpack.sql.SqlIllegalArgumentException;
import org.elasticsearch.xpack.sql.expression.gen.processor.BinaryProcessor;
import org.elasticsearch.xpack.sql.expression.gen.processor.Processor;
import org.elasticsearch.xpack.sql.expression.predicate.PredicateBiFunction;
import java.io.IOException;
import java.util.Objects;
import java.util.regex.Pattern;
public class RegexProcessor extends BinaryProcessor {
public class RegexProcessor implements Processor {
public static class RegexOperation implements PredicateBiFunction<String, String, Boolean> {
public static class RegexOperation {
public static final RegexOperation INSTANCE = new RegexOperation();
public static Boolean match(Object value, Pattern pattern) {
if (pattern == null) {
return Boolean.TRUE;
}
@Override
public String name() {
return symbol();
}
@Override
public String symbol() {
return "REGEX";
}
@Override
public Boolean doApply(String value, String pattern) {
return match(value, pattern);
}
public static Boolean match(Object value, Object pattern) {
if (value == null || pattern == null) {
if (value == null) {
return null;
}
Pattern p = Pattern.compile(pattern.toString());
return p.matcher(value.toString()).matches();
return pattern.matcher(value.toString()).matches();
}
public static Boolean match(Object value, String pattern) {
if (pattern == null) {
return Boolean.TRUE;
}
if (value == null) {
return null;
}
return Pattern.compile(pattern).matcher(value.toString()).matches();
}
}
public static final String NAME = "rgx";
public RegexProcessor(Processor value, Processor pattern) {
super(value, pattern);
}
private Pattern pattern;
public RegexProcessor(StreamInput in) throws IOException {
super(in);
}
@Override
protected Boolean doProcess(Object value, Object pattern) {
return RegexOperation.match(value, pattern);
}
@Override
protected void checkParameter(Object param) {
if (!(param instanceof String || param instanceof Character)) {
throw new SqlIllegalArgumentException("A string/char is required; received [{}]", param);
}
public RegexProcessor(String pattern) {
this.pattern = pattern != null ? Pattern.compile(pattern) : null;
}
@Override
@ -74,12 +55,23 @@ public class RegexProcessor extends BinaryProcessor {
return NAME;
}
public RegexProcessor(StreamInput in) throws IOException {
this(in.readOptionalString());
}
@Override
protected void doWrite(StreamOutput out) throws IOException {}
public void writeTo(StreamOutput out) throws IOException {
out.writeOptionalString(pattern != null ? pattern.toString() : null);
}
@Override
public Object process(Object input) {
return RegexOperation.match(input, pattern);
}
@Override
public int hashCode() {
return Objects.hash(left(), right());
return Objects.hash(pattern);
}
@Override
@ -93,6 +85,6 @@ public class RegexProcessor extends BinaryProcessor {
}
RegexProcessor other = (RegexProcessor) obj;
return Objects.equals(left(), other.left()) && Objects.equals(right(), other.right());
return Objects.equals(pattern, other.pattern);
}
}

View File

@ -232,7 +232,7 @@ abstract class ExpressionBuilder extends IdentifierBuilder {
e = new Like(loc, exp, visitPattern(pCtx.pattern()));
break;
case SqlBaseParser.RLIKE:
e = new RLike(loc, exp, new Literal(source(pCtx.regex), string(pCtx.regex), DataType.KEYWORD));
e = new RLike(loc, exp, string(pCtx.regex));
break;
case SqlBaseParser.NULL:
// shortcut to avoid double negation later on (since there's no IsNull (missing in ES is a negated exists))
@ -301,7 +301,7 @@ abstract class ExpressionBuilder extends IdentifierBuilder {
}
}
return new LikePattern(source(ctx), pattern, escape);
return new LikePattern(pattern, escape);
}

View File

@ -33,6 +33,7 @@ import org.elasticsearch.xpack.sql.expression.function.grouping.Histogram;
import org.elasticsearch.xpack.sql.expression.function.scalar.ScalarFunction;
import org.elasticsearch.xpack.sql.expression.function.scalar.datetime.DateTimeFunction;
import org.elasticsearch.xpack.sql.expression.function.scalar.datetime.DateTimeHistogramFunction;
import org.elasticsearch.xpack.sql.expression.gen.script.ScriptTemplate;
import org.elasticsearch.xpack.sql.expression.literal.Intervals;
import org.elasticsearch.xpack.sql.expression.predicate.Range;
import org.elasticsearch.xpack.sql.expression.predicate.fulltext.MatchQueryPredicate;
@ -103,7 +104,6 @@ import java.util.function.Supplier;
import static java.util.Collections.singletonList;
import static org.elasticsearch.xpack.sql.expression.Foldables.doubleValuesOf;
import static org.elasticsearch.xpack.sql.expression.Foldables.stringValueOf;
import static org.elasticsearch.xpack.sql.expression.Foldables.valueOf;
final class QueryTranslator {
@ -121,7 +121,8 @@ final class QueryTranslator {
new Likes(),
new StringQueries(),
new Matches(),
new MultiMatches()
new MultiMatches(),
new Scalars()
);
private static final List<AggTranslator<?>> AGG_TRANSLATORS = Arrays.asList(
@ -447,13 +448,13 @@ final class QueryTranslator {
boolean inexact = true;
String target = null;
if (e.left() instanceof FieldAttribute) {
FieldAttribute fa = (FieldAttribute) e.left();
if (e.field() instanceof FieldAttribute) {
FieldAttribute fa = (FieldAttribute) e.field();
inexact = fa.isInexact();
target = nameOf(inexact ? fa : fa.exactAttribute());
} else {
throw new SqlIllegalArgumentException("Scalar function ({}) not allowed (yet) as arguments for LIKE",
Expressions.name(e.left()));
Expressions.name(e.field()));
}
if (e instanceof Like) {
@ -462,21 +463,21 @@ final class QueryTranslator {
q = new QueryStringQuery(e.location(), p.asLuceneWildcard(), target);
}
else {
q = new WildcardQuery(e.location(), nameOf(e.left()), p.asLuceneWildcard());
q = new WildcardQuery(e.location(), nameOf(e.field()), p.asLuceneWildcard());
}
}
if (e instanceof RLike) {
String pattern = stringValueOf(e.right());
String pattern = ((RLike) e).pattern();
if (inexact) {
q = new QueryStringQuery(e.location(), "/" + pattern + "/", target);
}
else {
q = new RegexQuery(e.location(), nameOf(e.left()), pattern);
q = new RegexQuery(e.location(), nameOf(e.field()), pattern);
}
}
return q != null ? new QueryTranslation(wrapIfNested(q, e.left())) : null;
return q != null ? new QueryTranslation(wrapIfNested(q, e.field())) : null;
}
}
@ -529,8 +530,16 @@ final class QueryTranslator {
if (onAggs) {
aggFilter = new AggFilter(not.id().toString(), not.asScript());
} else {
query = handleQuery(not, not.field(),
() -> new NotQuery(not.location(), toQuery(not.field(), false).query));
Expression e = not.field();
Query wrappedQuery = toQuery(not.field(), false).query;
Query q = wrappedQuery instanceof ScriptQuery ? new ScriptQuery(not.location(),
not.asScript()) : new NotQuery(not.location(), wrappedQuery);
if (e instanceof FieldAttribute) {
query = wrapIfNested(q, e);
}
query = q;
}
return new QueryTranslation(query, aggFilter);
@ -547,8 +556,14 @@ final class QueryTranslator {
if (onAggs) {
aggFilter = new AggFilter(isNotNull.id().toString(), isNotNull.asScript());
} else {
query = handleQuery(isNotNull, isNotNull.field(),
() -> new ExistsQuery(isNotNull.location(), nameOf(isNotNull.field())));
Query q = null;
if (isNotNull.field() instanceof FieldAttribute) {
q = new ExistsQuery(isNotNull.location(), nameOf(isNotNull.field()));
} else {
q = new ScriptQuery(isNotNull.location(), isNotNull.asScript());
}
final Query qu = q;
query = handleQuery(isNotNull, isNotNull.field(), () -> qu);
}
return new QueryTranslation(query, aggFilter);
@ -565,8 +580,15 @@ final class QueryTranslator {
if (onAggs) {
aggFilter = new AggFilter(isNull.id().toString(), isNull.asScript());
} else {
query = handleQuery(isNull, isNull.field(),
() -> new NotQuery(isNull.location(), new ExistsQuery(isNull.location(), nameOf(isNull.field()))));
Query q = null;
if (isNull.field() instanceof FieldAttribute) {
q = new NotQuery(isNull.location(), new ExistsQuery(isNull.location(), nameOf(isNull.field())));
} else {
q = new ScriptQuery(isNull.location(), isNull.asScript());
}
final Query qu = q;
query = handleQuery(isNull, isNull.field(), () -> qu);
}
return new QueryTranslation(query, aggFilter);
@ -678,7 +700,14 @@ final class QueryTranslator {
aggFilter = new AggFilter(at.id().toString(), in.asScript());
}
else {
query = handleQuery(in, ne, () -> new TermsQuery(in.location(), ne.name(), in.list()));
Query q = null;
if (in.value() instanceof FieldAttribute) {
q = new TermsQuery(in.location(), ne.name(), in.list());
} else {
q = new ScriptQuery(in.location(), in.asScript());
}
Query qu = q;
query = handleQuery(in, ne, () -> qu);
}
return new QueryTranslation(query, aggFilter);
}
@ -719,6 +748,25 @@ final class QueryTranslator {
}
}
}
static class Scalars extends ExpressionTranslator<ScalarFunction> {
@Override
protected QueryTranslation asQuery(ScalarFunction f, boolean onAggs) {
ScriptTemplate script = f.asScript();
Query query = null;
AggFilter aggFilter = null;
if (onAggs) {
aggFilter = new AggFilter(f.id().toString(), script);
} else {
query = handleQuery(f, f, () -> new ScriptQuery(f.location(), script));
}
return new QueryTranslation(query, aggFilter);
}
}
//
@ -862,8 +910,9 @@ final class QueryTranslator {
protected static Query handleQuery(ScalarFunction sf, Expression field, Supplier<Query> query) {
Query q = query.get();
if (field instanceof FieldAttribute) {
return wrapIfNested(query.get(), field);
return wrapIfNested(q, field);
}
return new ScriptQuery(sf.location(), sf.asScript());
}

View File

@ -322,10 +322,10 @@ public class OptimizerTests extends ESTestCase {
public void testConstantFoldingLikes() {
assertEquals(Literal.TRUE,
new ConstantFolding().rule(new Like(EMPTY, Literal.of(EMPTY, "test_emp"), new LikePattern(EMPTY, "test%", (char) 0)))
new ConstantFolding().rule(new Like(EMPTY, Literal.of(EMPTY, "test_emp"), new LikePattern("test%", (char) 0)))
.canonical());
assertEquals(Literal.TRUE,
new ConstantFolding().rule(new RLike(EMPTY, Literal.of(EMPTY, "test_emp"), Literal.of(EMPTY, "test.emp"))).canonical());
new ConstantFolding().rule(new RLike(EMPTY, Literal.of(EMPTY, "test_emp"), "test.emp")).canonical());
}
public void testConstantFoldingDatetime() {
@ -419,7 +419,7 @@ public class OptimizerTests extends ESTestCase {
// comparison
assertNullLiteral(rule.rule(new GreaterThan(EMPTY, getFieldAttribute(), Literal.NULL)));
// regex
assertNullLiteral(rule.rule(new RLike(EMPTY, getFieldAttribute(), Literal.NULL)));
assertNullLiteral(rule.rule(new RLike(EMPTY, Literal.NULL, "123")));
}
public void testSimplifyCoalesceNulls() {

View File

@ -33,6 +33,7 @@ import org.elasticsearch.xpack.sql.expression.predicate.conditional.IfNull;
import org.elasticsearch.xpack.sql.expression.predicate.fulltext.FullTextPredicate;
import org.elasticsearch.xpack.sql.expression.predicate.operator.comparison.In;
import org.elasticsearch.xpack.sql.expression.predicate.operator.comparison.InPipe;
import org.elasticsearch.xpack.sql.expression.predicate.regex.Like;
import org.elasticsearch.xpack.sql.expression.predicate.regex.LikePattern;
import org.elasticsearch.xpack.sql.tree.NodeTests.ChildrenAreAProperty;
import org.elasticsearch.xpack.sql.tree.NodeTests.Dummy;
@ -449,14 +450,12 @@ public class NodeSubclassTests<T extends B, B extends Node<B>> extends ESTestCas
}
return b.toString();
}
} else if (toBuildClass == LikePattern.class) {
/*
* The pattern and escape character have to be valid together
* so we pick an escape character that isn't used
*/
if (argClass == char.class) {
return randomFrom('\\', '|', '/', '`');
} else if (toBuildClass == Like.class) {
if (argClass == LikePattern.class) {
return new LikePattern(randomAlphaOfLength(16), randomFrom('\\', '|', '/', '`'));
}
} else if (toBuildClass == Histogram.class) {
if (argClass == Expression.class) {
return LiteralTests.randomLiteral();