SQL: Fix RLIKE bug and improve testing for RLIKE statement (#40354)

* Refactor RegexMatch to support both LIKE and RLIKE
* Add integration tests for RLIKE
* Polish the rest of tests

(cherry picked from commit 7562d6eeeb77c04794002649fe726f4b3a9a398b)
This commit is contained in:
Andrei Stefan 2019-03-23 06:36:44 +02:00 committed by Andrei Stefan
parent 421d7654a7
commit 150d1332cf
11 changed files with 226 additions and 58 deletions

View File

@ -31,19 +31,25 @@ public abstract class CsvSpecTestCase extends SpecBaseIntegrationTestCase {
public static List<Object[]> readScriptSpec() throws Exception {
Parser parser = specParser();
List<Object[]> tests = new ArrayList<>();
tests.addAll(readScriptSpec("/select.csv-spec", parser));
tests.addAll(readScriptSpec("/command.csv-spec", parser));
tests.addAll(readScriptSpec("/fulltext.csv-spec", parser));
tests.addAll(readScriptSpec("/agg.csv-spec", parser));
tests.addAll(readScriptSpec("/alias.csv-spec", parser));
tests.addAll(readScriptSpec("/arithmetic.csv-spec", parser));
tests.addAll(readScriptSpec("/columns.csv-spec", parser));
tests.addAll(readScriptSpec("/command.csv-spec", parser));
//tests.addAll(readScriptSpec("/command-sys.csv-spec", parser));
tests.addAll(readScriptSpec("/date.csv-spec", parser));
tests.addAll(readScriptSpec("/datetime.csv-spec", parser));
tests.addAll(readScriptSpec("/alias.csv-spec", parser));
tests.addAll(readScriptSpec("/datetime-interval.csv-spec", parser));
tests.addAll(readScriptSpec("/field-alias.csv-spec", parser));
tests.addAll(readScriptSpec("/filter.csv-spec", parser));
tests.addAll(readScriptSpec("/fulltext.csv-spec", parser));
tests.addAll(readScriptSpec("/functions.csv-spec", parser));
//tests.addAll(readScriptSpec("/ip.csv-spec", parser));
tests.addAll(readScriptSpec("/math.csv-spec", parser));
tests.addAll(readScriptSpec("/null.csv-spec", parser));
tests.addAll(readScriptSpec("/nested.csv-spec", parser));
tests.addAll(readScriptSpec("/functions.csv-spec", parser));
tests.addAll(readScriptSpec("/math.csv-spec", parser));
tests.addAll(readScriptSpec("/field-alias.csv-spec", parser));
tests.addAll(readScriptSpec("/select.csv-spec", parser));
return tests;
}

View File

@ -178,10 +178,7 @@ aggCountOnColumnAndMultipleHaving
SELECT gender g, COUNT(gender) c FROM "test_emp" GROUP BY g HAVING c > 10 AND c < 70 ORDER BY gender ;
aggCountOnColumnAndMultipleHavingEquals
SELECT gender g, COUNT(gender) c FROM "test_emp" GROUP BY g HAVING c > 10 AND c = 63 ORDER BY gender ;
//
// Count(column) = Column(*) which is a bug
// https://github.com/elastic/elasticsearch/issues/34549
//
aggCountOnColumnAndMultipleHavingWithLimit
SELECT gender g, COUNT(gender) c FROM "test_emp" GROUP BY g HAVING c > 10 AND c < 70 ORDER BY gender LIMIT 1;
aggCountOnColumnAndHavingBetween-Ignore

View File

@ -5,9 +5,14 @@
// the standard behavior here is to return the constant for each element
// the weird thing is that an actual query needs to be ran
arithmeticWithFrom
SELECT 5 - 2 x FROM test_emp;
SELECT 5 - 2 x FROM test_emp LIMIT 5;
x
x:i
---------------
3
3
3
3
3
;

View File

@ -0,0 +1,119 @@
//
// Filter
//
whereFieldWithRLikeMatch
SELECT last_name l FROM "test_emp" WHERE emp_no < 10003 AND last_name RLIKE 'S.*';
l:s
---------------
Simmel
;
whereFieldWithNotRLikeMatch
SELECT last_name, first_name FROM "test_emp" WHERE emp_no < 10020 AND first_name NOT RLIKE 'Ma.*' ORDER BY first_name LIMIT 5;
last_name:s | first_name:s
---------------+---------------
Preusig |Anneke
Genin |Berni
Simmel |Bezalel
Koblick |Chirstian
Bouloucos |Cristinel
;
whereFieldWithRLikeMatchNot
SELECT last_name AS L, emp_no FROM "test_emp" WHERE NOT (emp_no < 10003 AND L NOT RLIKE 'K.*') ORDER BY emp_no LIMIT 5;
L:s | emp_no:i
---------------+---------------
Bamford |10003
Koblick |10004
Maliniak |10005
Preusig |10006
Zielinski |10007
;
whereFieldOnMatchWithAndAndOr
SELECT last_name l, gender g FROM "test_emp" WHERE (last_name RLIKE 'K.*' OR gender = 'F') AND emp_no < 10050 ORDER BY last_name;
l:s | g:s
---------------+---------------
Casley |F
Kalloufi |M
Koblick |M
Lenart |F
Meriste |F
Montemayor |F
Peac |F
Pettey |F
Preusig |F
Reistad |F
Reistad |F
Simmel |F
Stamatiou |F
Tramer |F
Zielinski |F
;
whereFieldWithRLikeAndGroupByOrderBy
SELECT last_name l, gender g, COUNT(*) c, MAX(salary) AS sal FROM "test_emp" WHERE emp_no < 10050 AND (last_name RLIKE 'B.*' OR gender = 'F') GROUP BY g, l ORDER BY sal;
l:s | g:s | c:l | sal:i
---------------+---------------+---------------+---------------
Berztiss |M |1 |28336
Stamatiou |F |1 |30404
Brender |M |1 |36051
Meriste |F |1 |37112
Tramer |F |1 |37853
Casley |F |1 |39728
Montemayor |F |1 |47896
Bridgland |null |1 |48942
Simmel |F |1 |56371
Lenart |F |1 |56415
Bouloucos |null |1 |58715
Preusig |F |1 |60335
Bamford |M |1 |61805
Pettey |F |1 |64675
Peac |F |1 |66174
Reistad |F |2 |73851
Zielinski |F |1 |74572
;
whereFieldWithRLikeAndNotRLike
SELECT COUNT(*), last_name AS f FROM test_emp WHERE last_name RLIKE '.*o.*' AND last_name NOT RLIKE '.*f.*' GROUP BY f HAVING COUNT(*) > 1;
COUNT(*):l | f:s
---------------+---------------
2 |Lortz
;
whereInlineRLike
SELECT emp_no FROM test_emp WHERE 'aaabbb' RLIKE 'aa+b+' AND 'aaabbb' NOT RLIKE 'a++c+' AND emp_no < 10080 ORDER BY emp_no DESC LIMIT 5;
emp_no:i
---------------
10079
10078
10077
10076
10075
;
whereInlineRLikeAndCount_1
SELECT COUNT(*), TRUNCATE(emp_no, -2) t FROM test_emp WHERE 'aaabbb' RLIKE '.....?.?' AND 'aaabbb' NOT RLIKE 'aa?bb?' GROUP BY TRUNCATE(emp_no, -2) ORDER BY t ASC;
COUNT(*):l | t:i
---------------+---------------
99 |10000
1 |10100
;
whereInlineRLikeAndCount_2
SELECT COUNT(*), TRUNCATE(emp_no, -2) t FROM test_emp WHERE 'aaabbb' RLIKE 'a{2,}b{2,}' AND 'aaabbb' NOT RLIKE 'a{4,6}b{4,6}' GROUP BY TRUNCATE(emp_no, -2) ORDER BY t ASC;
COUNT(*):l | t:i
---------------+---------------
99 |10000
1 |10100
;

View File

@ -51,6 +51,8 @@ whereFieldWithLikeMatch
SELECT last_name l FROM "test_emp" WHERE emp_no < 10003 AND last_name LIKE 'K%';
whereFieldWithNotLikeMatch
SELECT last_name l FROM "test_emp" WHERE emp_no < 10020 AND first_name NOT LIKE 'Ma%';
whereFieldWithInlineLikeMatch
SELECT emp_no FROM "test_emp" WHERE 'aaabbb' LIKE 'aa%b%' AND 'aaabbb' NOT LIKE 'a%%c%' AND emp_no < 10080 ORDER BY emp_no DESC LIMIT 5;
whereFieldWithOrderNot
SELECT last_name l FROM "test_emp" WHERE NOT emp_no < 10003 ORDER BY emp_no LIMIT 5;

View File

@ -31,7 +31,7 @@ import org.elasticsearch.xpack.sql.expression.function.UnresolvedFunction;
import org.elasticsearch.xpack.sql.expression.function.aggregate.Count;
import org.elasticsearch.xpack.sql.expression.function.scalar.Cast;
import org.elasticsearch.xpack.sql.expression.predicate.operator.arithmetic.ArithmeticOperation;
import org.elasticsearch.xpack.sql.expression.predicate.regex.Like;
import org.elasticsearch.xpack.sql.expression.predicate.regex.RegexMatch;
import org.elasticsearch.xpack.sql.plan.TableIdentifier;
import org.elasticsearch.xpack.sql.plan.logical.Aggregate;
import org.elasticsearch.xpack.sql.plan.logical.EsRelation;
@ -852,8 +852,8 @@ public class Analyzer extends RuleExecutor<LogicalPlan> {
// TODO: we should move to always compare the functions directly
// Special check for COUNT: an already seen COUNT function will be returned only if its DISTINCT property
// matches the one from the unresolved function to be checked.
// Same for LIKE: the equals function also compares the pattern of LIKE
if (seenFunction instanceof Count || seenFunction instanceof Like) {
// Same for LIKE/RLIKE: the equals function also compares the pattern of LIKE/RLIKE
if (seenFunction instanceof Count || seenFunction instanceof RegexMatch) {
if (seenFunction.equals(f)){
return seenFunction;
}

View File

@ -6,41 +6,35 @@
package org.elasticsearch.xpack.sql.expression.predicate.regex;
import org.elasticsearch.xpack.sql.expression.Expression;
import org.elasticsearch.xpack.sql.expression.gen.processor.Processor;
import org.elasticsearch.xpack.sql.expression.predicate.regex.RegexProcessor.RegexOperation;
import org.elasticsearch.xpack.sql.tree.NodeInfo;
import org.elasticsearch.xpack.sql.tree.Source;
import java.util.Objects;
public class Like extends RegexMatch {
private final LikePattern pattern;
public class Like extends RegexMatch<LikePattern> {
public Like(Source source, Expression left, LikePattern pattern) {
super(source, left, pattern.asJavaRegex());
this.pattern = pattern;
}
public LikePattern pattern() {
return pattern;
super(source, left, pattern);
}
@Override
protected NodeInfo<Like> info() {
return NodeInfo.create(this, Like::new, field(), pattern);
return NodeInfo.create(this, Like::new, field(), pattern());
}
@Override
protected Like replaceChild(Expression newLeft) {
return new Like(source(), newLeft, pattern);
return new Like(source(), newLeft, pattern());
}
@Override
public boolean equals(Object obj) {
return super.equals(obj) && Objects.equals(((Like) obj).pattern(), pattern());
public Boolean fold() {
Object val = field().fold();
return RegexOperation.match(val, pattern().asJavaRegex());
}
@Override
public int hashCode() {
return Objects.hash(super.hashCode(), pattern());
protected Processor makeProcessor() {
return new RegexProcessor(pattern().asJavaRegex());
}
}

View File

@ -6,29 +6,35 @@
package org.elasticsearch.xpack.sql.expression.predicate.regex;
import org.elasticsearch.xpack.sql.expression.Expression;
import org.elasticsearch.xpack.sql.expression.gen.processor.Processor;
import org.elasticsearch.xpack.sql.expression.predicate.regex.RegexProcessor.RegexOperation;
import org.elasticsearch.xpack.sql.tree.Source;
import org.elasticsearch.xpack.sql.tree.NodeInfo;
public class RLike extends RegexMatch {
public class RLike extends RegexMatch<String> {
private final String pattern;
public RLike(Source source, Expression left, String pattern) {
super(source, left, pattern);
this.pattern = pattern;
}
public String pattern() {
return pattern;
public RLike(Source source, Expression value, String pattern) {
super(source, value, pattern);
}
@Override
protected NodeInfo<RLike> info() {
return NodeInfo.create(this, RLike::new, field(), pattern);
return NodeInfo.create(this, RLike::new, field(), pattern());
}
@Override
protected RLike replaceChild(Expression newChild) {
return new RLike(source(), newChild, pattern);
return new RLike(source(), newChild, pattern());
}
@Override
public Boolean fold() {
Object val = field().fold();
return RegexOperation.match(val, pattern());
}
@Override
protected Processor makeProcessor() {
return new RegexProcessor(pattern());
}
}

View File

@ -10,21 +10,25 @@ import org.elasticsearch.xpack.sql.expression.Expression;
import org.elasticsearch.xpack.sql.expression.Expressions;
import org.elasticsearch.xpack.sql.expression.Nullability;
import org.elasticsearch.xpack.sql.expression.function.scalar.UnaryScalarFunction;
import org.elasticsearch.xpack.sql.expression.gen.processor.Processor;
import org.elasticsearch.xpack.sql.expression.predicate.regex.RegexProcessor.RegexOperation;
import org.elasticsearch.xpack.sql.tree.Source;
import org.elasticsearch.xpack.sql.type.DataType;
import java.util.Objects;
import static org.elasticsearch.xpack.sql.expression.TypeResolutions.isStringAndExact;
public abstract class RegexMatch extends UnaryScalarFunction {
public abstract class RegexMatch<T> extends UnaryScalarFunction {
private final String pattern;
protected RegexMatch(Source source, Expression value, String pattern) {
private final T pattern;
protected RegexMatch(Source source, Expression value, T pattern) {
super(source, value);
this.pattern = pattern;
}
public T pattern() {
return pattern;
}
@Override
public DataType dataType() {
@ -33,7 +37,7 @@ public abstract class RegexMatch extends UnaryScalarFunction {
@Override
public Nullability nullable() {
if (pattern == null) {
if (pattern() == null) {
return Nullability.TRUE;
}
return field().nullable();
@ -49,15 +53,14 @@ public abstract class RegexMatch extends UnaryScalarFunction {
// right() is not directly foldable in any context but Like can fold it.
return field().foldable();
}
@Override
public Boolean fold() {
Object val = field().fold();
return RegexOperation.match(val, pattern);
public boolean equals(Object obj) {
return super.equals(obj) && Objects.equals(((RegexMatch<?>) obj).pattern(), pattern());
}
@Override
protected Processor makeProcessor() {
return new RegexProcessor(pattern);
public int hashCode() {
return Objects.hash(super.hashCode(), pattern());
}
}

View File

@ -472,6 +472,7 @@ final class QueryTranslator {
// TODO: need to optimize on ngram
// TODO: see whether escaping is needed
@SuppressWarnings("rawtypes")
static class Likes extends ExpressionTranslator<RegexMatch> {
@Override

View File

@ -228,6 +228,41 @@ public class QueryTranslatorTests extends ESTestCase {
assertEquals(1, rqsq.fields().size());
assertEquals("keyword", rqsq.fields().keySet().iterator().next());
}
public void testRLikePatterns() {
String[] patterns = new String[] {"(...)+", "abab(ab)?", "(ab){1,2}", "(ab){3}", "aabb|bbaa", "a+b+|b+a+", "aa(cc|bb)",
"a{4,6}b{4,6}", ".{3}.{3}", "aaa*bbb*", "a+.+", "a.c.e", "[^abc\\-]"};
for (int i = 0; i < 5; i++) {
assertDifferentRLikeAndNotRLikePatterns(randomFrom(patterns), randomFrom(patterns));
}
}
private void assertDifferentRLikeAndNotRLikePatterns(String firstPattern, String secondPattern) {
LogicalPlan p = plan("SELECT keyword k FROM test WHERE k RLIKE '" + firstPattern + "' AND k NOT RLIKE '" + secondPattern + "'");
assertTrue(p instanceof Project);
p = ((Project) p).child();
assertTrue(p instanceof Filter);
Expression condition = ((Filter) p).condition();
QueryTranslation qt = QueryTranslator.toQuery(condition, false);
assertEquals(BoolQuery.class, qt.query.getClass());
BoolQuery bq = ((BoolQuery) qt.query);
assertTrue(bq.isAnd());
assertTrue(bq.left() instanceof QueryStringQuery);
assertTrue(bq.right() instanceof NotQuery);
NotQuery nq = (NotQuery) bq.right();
assertTrue(nq.child() instanceof QueryStringQuery);
QueryStringQuery lqsq = (QueryStringQuery) bq.left();
QueryStringQuery rqsq = (QueryStringQuery) nq.child();
assertEquals("/" + firstPattern + "/", lqsq.query());
assertEquals(1, lqsq.fields().size());
assertEquals("keyword", lqsq.fields().keySet().iterator().next());
assertEquals("/" + secondPattern + "/", rqsq.query());
assertEquals(1, rqsq.fields().size());
assertEquals("keyword", rqsq.fields().keySet().iterator().next());
}
public void testTranslateNotExpression_WhereClause_Painless() {
LogicalPlan p = plan("SELECT * FROM test WHERE NOT(POSITION('x', keyword) = 0)");