SQL: Fix RLIKE bug and improve testing for RLIKE statement (#40354)
* Refactor RegexMatch to support both LIKE and RLIKE * Add integration tests for RLIKE * Polish the rest of tests (cherry picked from commit 7562d6eeeb77c04794002649fe726f4b3a9a398b)
This commit is contained in:
parent
421d7654a7
commit
150d1332cf
|
@ -31,19 +31,25 @@ public abstract class CsvSpecTestCase extends SpecBaseIntegrationTestCase {
|
|||
public static List<Object[]> readScriptSpec() throws Exception {
|
||||
Parser parser = specParser();
|
||||
List<Object[]> tests = new ArrayList<>();
|
||||
tests.addAll(readScriptSpec("/select.csv-spec", parser));
|
||||
tests.addAll(readScriptSpec("/command.csv-spec", parser));
|
||||
tests.addAll(readScriptSpec("/fulltext.csv-spec", parser));
|
||||
tests.addAll(readScriptSpec("/agg.csv-spec", parser));
|
||||
tests.addAll(readScriptSpec("/alias.csv-spec", parser));
|
||||
tests.addAll(readScriptSpec("/arithmetic.csv-spec", parser));
|
||||
tests.addAll(readScriptSpec("/columns.csv-spec", parser));
|
||||
tests.addAll(readScriptSpec("/command.csv-spec", parser));
|
||||
//tests.addAll(readScriptSpec("/command-sys.csv-spec", parser));
|
||||
tests.addAll(readScriptSpec("/date.csv-spec", parser));
|
||||
tests.addAll(readScriptSpec("/datetime.csv-spec", parser));
|
||||
tests.addAll(readScriptSpec("/alias.csv-spec", parser));
|
||||
tests.addAll(readScriptSpec("/datetime-interval.csv-spec", parser));
|
||||
tests.addAll(readScriptSpec("/field-alias.csv-spec", parser));
|
||||
tests.addAll(readScriptSpec("/filter.csv-spec", parser));
|
||||
tests.addAll(readScriptSpec("/fulltext.csv-spec", parser));
|
||||
tests.addAll(readScriptSpec("/functions.csv-spec", parser));
|
||||
//tests.addAll(readScriptSpec("/ip.csv-spec", parser));
|
||||
tests.addAll(readScriptSpec("/math.csv-spec", parser));
|
||||
tests.addAll(readScriptSpec("/null.csv-spec", parser));
|
||||
tests.addAll(readScriptSpec("/nested.csv-spec", parser));
|
||||
tests.addAll(readScriptSpec("/functions.csv-spec", parser));
|
||||
tests.addAll(readScriptSpec("/math.csv-spec", parser));
|
||||
tests.addAll(readScriptSpec("/field-alias.csv-spec", parser));
|
||||
tests.addAll(readScriptSpec("/select.csv-spec", parser));
|
||||
|
||||
return tests;
|
||||
}
|
||||
|
||||
|
|
|
@ -178,10 +178,7 @@ aggCountOnColumnAndMultipleHaving
|
|||
SELECT gender g, COUNT(gender) c FROM "test_emp" GROUP BY g HAVING c > 10 AND c < 70 ORDER BY gender ;
|
||||
aggCountOnColumnAndMultipleHavingEquals
|
||||
SELECT gender g, COUNT(gender) c FROM "test_emp" GROUP BY g HAVING c > 10 AND c = 63 ORDER BY gender ;
|
||||
//
|
||||
// Count(column) = Column(*) which is a bug
|
||||
// https://github.com/elastic/elasticsearch/issues/34549
|
||||
//
|
||||
|
||||
aggCountOnColumnAndMultipleHavingWithLimit
|
||||
SELECT gender g, COUNT(gender) c FROM "test_emp" GROUP BY g HAVING c > 10 AND c < 70 ORDER BY gender LIMIT 1;
|
||||
aggCountOnColumnAndHavingBetween-Ignore
|
||||
|
|
|
@ -5,9 +5,14 @@
|
|||
// the standard behavior here is to return the constant for each element
|
||||
// the weird thing is that an actual query needs to be ran
|
||||
arithmeticWithFrom
|
||||
SELECT 5 - 2 x FROM test_emp;
|
||||
SELECT 5 - 2 x FROM test_emp LIMIT 5;
|
||||
|
||||
x
|
||||
x:i
|
||||
---------------
|
||||
3
|
||||
3
|
||||
3
|
||||
3
|
||||
3
|
||||
;
|
||||
|
||||
|
|
|
@ -0,0 +1,119 @@
|
|||
//
|
||||
// Filter
|
||||
//
|
||||
|
||||
whereFieldWithRLikeMatch
|
||||
SELECT last_name l FROM "test_emp" WHERE emp_no < 10003 AND last_name RLIKE 'S.*';
|
||||
|
||||
l:s
|
||||
---------------
|
||||
Simmel
|
||||
;
|
||||
|
||||
whereFieldWithNotRLikeMatch
|
||||
SELECT last_name, first_name FROM "test_emp" WHERE emp_no < 10020 AND first_name NOT RLIKE 'Ma.*' ORDER BY first_name LIMIT 5;
|
||||
|
||||
last_name:s | first_name:s
|
||||
---------------+---------------
|
||||
Preusig |Anneke
|
||||
Genin |Berni
|
||||
Simmel |Bezalel
|
||||
Koblick |Chirstian
|
||||
Bouloucos |Cristinel
|
||||
;
|
||||
|
||||
whereFieldWithRLikeMatchNot
|
||||
SELECT last_name AS L, emp_no FROM "test_emp" WHERE NOT (emp_no < 10003 AND L NOT RLIKE 'K.*') ORDER BY emp_no LIMIT 5;
|
||||
|
||||
L:s | emp_no:i
|
||||
---------------+---------------
|
||||
Bamford |10003
|
||||
Koblick |10004
|
||||
Maliniak |10005
|
||||
Preusig |10006
|
||||
Zielinski |10007
|
||||
;
|
||||
|
||||
whereFieldOnMatchWithAndAndOr
|
||||
SELECT last_name l, gender g FROM "test_emp" WHERE (last_name RLIKE 'K.*' OR gender = 'F') AND emp_no < 10050 ORDER BY last_name;
|
||||
|
||||
l:s | g:s
|
||||
---------------+---------------
|
||||
Casley |F
|
||||
Kalloufi |M
|
||||
Koblick |M
|
||||
Lenart |F
|
||||
Meriste |F
|
||||
Montemayor |F
|
||||
Peac |F
|
||||
Pettey |F
|
||||
Preusig |F
|
||||
Reistad |F
|
||||
Reistad |F
|
||||
Simmel |F
|
||||
Stamatiou |F
|
||||
Tramer |F
|
||||
Zielinski |F
|
||||
;
|
||||
|
||||
whereFieldWithRLikeAndGroupByOrderBy
|
||||
SELECT last_name l, gender g, COUNT(*) c, MAX(salary) AS sal FROM "test_emp" WHERE emp_no < 10050 AND (last_name RLIKE 'B.*' OR gender = 'F') GROUP BY g, l ORDER BY sal;
|
||||
|
||||
l:s | g:s | c:l | sal:i
|
||||
---------------+---------------+---------------+---------------
|
||||
Berztiss |M |1 |28336
|
||||
Stamatiou |F |1 |30404
|
||||
Brender |M |1 |36051
|
||||
Meriste |F |1 |37112
|
||||
Tramer |F |1 |37853
|
||||
Casley |F |1 |39728
|
||||
Montemayor |F |1 |47896
|
||||
Bridgland |null |1 |48942
|
||||
Simmel |F |1 |56371
|
||||
Lenart |F |1 |56415
|
||||
Bouloucos |null |1 |58715
|
||||
Preusig |F |1 |60335
|
||||
Bamford |M |1 |61805
|
||||
Pettey |F |1 |64675
|
||||
Peac |F |1 |66174
|
||||
Reistad |F |2 |73851
|
||||
Zielinski |F |1 |74572
|
||||
;
|
||||
|
||||
whereFieldWithRLikeAndNotRLike
|
||||
SELECT COUNT(*), last_name AS f FROM test_emp WHERE last_name RLIKE '.*o.*' AND last_name NOT RLIKE '.*f.*' GROUP BY f HAVING COUNT(*) > 1;
|
||||
|
||||
COUNT(*):l | f:s
|
||||
---------------+---------------
|
||||
2 |Lortz
|
||||
;
|
||||
|
||||
whereInlineRLike
|
||||
SELECT emp_no FROM test_emp WHERE 'aaabbb' RLIKE 'aa+b+' AND 'aaabbb' NOT RLIKE 'a++c+' AND emp_no < 10080 ORDER BY emp_no DESC LIMIT 5;
|
||||
|
||||
emp_no:i
|
||||
---------------
|
||||
10079
|
||||
10078
|
||||
10077
|
||||
10076
|
||||
10075
|
||||
;
|
||||
|
||||
whereInlineRLikeAndCount_1
|
||||
SELECT COUNT(*), TRUNCATE(emp_no, -2) t FROM test_emp WHERE 'aaabbb' RLIKE '.....?.?' AND 'aaabbb' NOT RLIKE 'aa?bb?' GROUP BY TRUNCATE(emp_no, -2) ORDER BY t ASC;
|
||||
|
||||
COUNT(*):l | t:i
|
||||
---------------+---------------
|
||||
99 |10000
|
||||
1 |10100
|
||||
;
|
||||
|
||||
whereInlineRLikeAndCount_2
|
||||
SELECT COUNT(*), TRUNCATE(emp_no, -2) t FROM test_emp WHERE 'aaabbb' RLIKE 'a{2,}b{2,}' AND 'aaabbb' NOT RLIKE 'a{4,6}b{4,6}' GROUP BY TRUNCATE(emp_no, -2) ORDER BY t ASC;
|
||||
|
||||
COUNT(*):l | t:i
|
||||
---------------+---------------
|
||||
99 |10000
|
||||
1 |10100
|
||||
;
|
|
@ -51,6 +51,8 @@ whereFieldWithLikeMatch
|
|||
SELECT last_name l FROM "test_emp" WHERE emp_no < 10003 AND last_name LIKE 'K%';
|
||||
whereFieldWithNotLikeMatch
|
||||
SELECT last_name l FROM "test_emp" WHERE emp_no < 10020 AND first_name NOT LIKE 'Ma%';
|
||||
whereFieldWithInlineLikeMatch
|
||||
SELECT emp_no FROM "test_emp" WHERE 'aaabbb' LIKE 'aa%b%' AND 'aaabbb' NOT LIKE 'a%%c%' AND emp_no < 10080 ORDER BY emp_no DESC LIMIT 5;
|
||||
|
||||
whereFieldWithOrderNot
|
||||
SELECT last_name l FROM "test_emp" WHERE NOT emp_no < 10003 ORDER BY emp_no LIMIT 5;
|
||||
|
|
|
@ -31,7 +31,7 @@ import org.elasticsearch.xpack.sql.expression.function.UnresolvedFunction;
|
|||
import org.elasticsearch.xpack.sql.expression.function.aggregate.Count;
|
||||
import org.elasticsearch.xpack.sql.expression.function.scalar.Cast;
|
||||
import org.elasticsearch.xpack.sql.expression.predicate.operator.arithmetic.ArithmeticOperation;
|
||||
import org.elasticsearch.xpack.sql.expression.predicate.regex.Like;
|
||||
import org.elasticsearch.xpack.sql.expression.predicate.regex.RegexMatch;
|
||||
import org.elasticsearch.xpack.sql.plan.TableIdentifier;
|
||||
import org.elasticsearch.xpack.sql.plan.logical.Aggregate;
|
||||
import org.elasticsearch.xpack.sql.plan.logical.EsRelation;
|
||||
|
@ -852,8 +852,8 @@ public class Analyzer extends RuleExecutor<LogicalPlan> {
|
|||
// TODO: we should move to always compare the functions directly
|
||||
// Special check for COUNT: an already seen COUNT function will be returned only if its DISTINCT property
|
||||
// matches the one from the unresolved function to be checked.
|
||||
// Same for LIKE: the equals function also compares the pattern of LIKE
|
||||
if (seenFunction instanceof Count || seenFunction instanceof Like) {
|
||||
// Same for LIKE/RLIKE: the equals function also compares the pattern of LIKE/RLIKE
|
||||
if (seenFunction instanceof Count || seenFunction instanceof RegexMatch) {
|
||||
if (seenFunction.equals(f)){
|
||||
return seenFunction;
|
||||
}
|
||||
|
|
|
@ -6,41 +6,35 @@
|
|||
package org.elasticsearch.xpack.sql.expression.predicate.regex;
|
||||
|
||||
import org.elasticsearch.xpack.sql.expression.Expression;
|
||||
import org.elasticsearch.xpack.sql.expression.gen.processor.Processor;
|
||||
import org.elasticsearch.xpack.sql.expression.predicate.regex.RegexProcessor.RegexOperation;
|
||||
import org.elasticsearch.xpack.sql.tree.NodeInfo;
|
||||
import org.elasticsearch.xpack.sql.tree.Source;
|
||||
|
||||
import java.util.Objects;
|
||||
|
||||
public class Like extends RegexMatch {
|
||||
|
||||
private final LikePattern pattern;
|
||||
public class Like extends RegexMatch<LikePattern> {
|
||||
|
||||
public Like(Source source, Expression left, LikePattern pattern) {
|
||||
super(source, left, pattern.asJavaRegex());
|
||||
this.pattern = pattern;
|
||||
}
|
||||
|
||||
public LikePattern pattern() {
|
||||
return pattern;
|
||||
super(source, left, pattern);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected NodeInfo<Like> info() {
|
||||
return NodeInfo.create(this, Like::new, field(), pattern);
|
||||
return NodeInfo.create(this, Like::new, field(), pattern());
|
||||
}
|
||||
|
||||
@Override
|
||||
protected Like replaceChild(Expression newLeft) {
|
||||
return new Like(source(), newLeft, pattern);
|
||||
return new Like(source(), newLeft, pattern());
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals(Object obj) {
|
||||
return super.equals(obj) && Objects.equals(((Like) obj).pattern(), pattern());
|
||||
public Boolean fold() {
|
||||
Object val = field().fold();
|
||||
return RegexOperation.match(val, pattern().asJavaRegex());
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
return Objects.hash(super.hashCode(), pattern());
|
||||
protected Processor makeProcessor() {
|
||||
return new RegexProcessor(pattern().asJavaRegex());
|
||||
}
|
||||
}
|
||||
|
|
|
@ -6,29 +6,35 @@
|
|||
package org.elasticsearch.xpack.sql.expression.predicate.regex;
|
||||
|
||||
import org.elasticsearch.xpack.sql.expression.Expression;
|
||||
import org.elasticsearch.xpack.sql.expression.gen.processor.Processor;
|
||||
import org.elasticsearch.xpack.sql.expression.predicate.regex.RegexProcessor.RegexOperation;
|
||||
import org.elasticsearch.xpack.sql.tree.Source;
|
||||
import org.elasticsearch.xpack.sql.tree.NodeInfo;
|
||||
|
||||
public class RLike extends RegexMatch {
|
||||
public class RLike extends RegexMatch<String> {
|
||||
|
||||
private final String pattern;
|
||||
|
||||
public RLike(Source source, Expression left, String pattern) {
|
||||
super(source, left, pattern);
|
||||
this.pattern = pattern;
|
||||
}
|
||||
|
||||
public String pattern() {
|
||||
return pattern;
|
||||
public RLike(Source source, Expression value, String pattern) {
|
||||
super(source, value, pattern);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected NodeInfo<RLike> info() {
|
||||
return NodeInfo.create(this, RLike::new, field(), pattern);
|
||||
return NodeInfo.create(this, RLike::new, field(), pattern());
|
||||
}
|
||||
|
||||
@Override
|
||||
protected RLike replaceChild(Expression newChild) {
|
||||
return new RLike(source(), newChild, pattern);
|
||||
return new RLike(source(), newChild, pattern());
|
||||
}
|
||||
|
||||
@Override
|
||||
public Boolean fold() {
|
||||
Object val = field().fold();
|
||||
return RegexOperation.match(val, pattern());
|
||||
}
|
||||
|
||||
@Override
|
||||
protected Processor makeProcessor() {
|
||||
return new RegexProcessor(pattern());
|
||||
}
|
||||
}
|
||||
|
|
|
@ -10,21 +10,25 @@ import org.elasticsearch.xpack.sql.expression.Expression;
|
|||
import org.elasticsearch.xpack.sql.expression.Expressions;
|
||||
import org.elasticsearch.xpack.sql.expression.Nullability;
|
||||
import org.elasticsearch.xpack.sql.expression.function.scalar.UnaryScalarFunction;
|
||||
import org.elasticsearch.xpack.sql.expression.gen.processor.Processor;
|
||||
import org.elasticsearch.xpack.sql.expression.predicate.regex.RegexProcessor.RegexOperation;
|
||||
import org.elasticsearch.xpack.sql.tree.Source;
|
||||
import org.elasticsearch.xpack.sql.type.DataType;
|
||||
|
||||
import java.util.Objects;
|
||||
|
||||
import static org.elasticsearch.xpack.sql.expression.TypeResolutions.isStringAndExact;
|
||||
|
||||
public abstract class RegexMatch extends UnaryScalarFunction {
|
||||
public abstract class RegexMatch<T> extends UnaryScalarFunction {
|
||||
|
||||
private final String pattern;
|
||||
|
||||
protected RegexMatch(Source source, Expression value, String pattern) {
|
||||
private final T pattern;
|
||||
|
||||
protected RegexMatch(Source source, Expression value, T pattern) {
|
||||
super(source, value);
|
||||
this.pattern = pattern;
|
||||
}
|
||||
|
||||
public T pattern() {
|
||||
return pattern;
|
||||
}
|
||||
|
||||
@Override
|
||||
public DataType dataType() {
|
||||
|
@ -33,7 +37,7 @@ public abstract class RegexMatch extends UnaryScalarFunction {
|
|||
|
||||
@Override
|
||||
public Nullability nullable() {
|
||||
if (pattern == null) {
|
||||
if (pattern() == null) {
|
||||
return Nullability.TRUE;
|
||||
}
|
||||
return field().nullable();
|
||||
|
@ -49,15 +53,14 @@ public abstract class RegexMatch extends UnaryScalarFunction {
|
|||
// right() is not directly foldable in any context but Like can fold it.
|
||||
return field().foldable();
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public Boolean fold() {
|
||||
Object val = field().fold();
|
||||
return RegexOperation.match(val, pattern);
|
||||
public boolean equals(Object obj) {
|
||||
return super.equals(obj) && Objects.equals(((RegexMatch<?>) obj).pattern(), pattern());
|
||||
}
|
||||
|
||||
@Override
|
||||
protected Processor makeProcessor() {
|
||||
return new RegexProcessor(pattern);
|
||||
public int hashCode() {
|
||||
return Objects.hash(super.hashCode(), pattern());
|
||||
}
|
||||
}
|
||||
|
|
|
@ -472,6 +472,7 @@ final class QueryTranslator {
|
|||
|
||||
// TODO: need to optimize on ngram
|
||||
// TODO: see whether escaping is needed
|
||||
@SuppressWarnings("rawtypes")
|
||||
static class Likes extends ExpressionTranslator<RegexMatch> {
|
||||
|
||||
@Override
|
||||
|
|
|
@ -228,6 +228,41 @@ public class QueryTranslatorTests extends ESTestCase {
|
|||
assertEquals(1, rqsq.fields().size());
|
||||
assertEquals("keyword", rqsq.fields().keySet().iterator().next());
|
||||
}
|
||||
|
||||
public void testRLikePatterns() {
|
||||
String[] patterns = new String[] {"(...)+", "abab(ab)?", "(ab){1,2}", "(ab){3}", "aabb|bbaa", "a+b+|b+a+", "aa(cc|bb)",
|
||||
"a{4,6}b{4,6}", ".{3}.{3}", "aaa*bbb*", "a+.+", "a.c.e", "[^abc\\-]"};
|
||||
for (int i = 0; i < 5; i++) {
|
||||
assertDifferentRLikeAndNotRLikePatterns(randomFrom(patterns), randomFrom(patterns));
|
||||
}
|
||||
}
|
||||
|
||||
private void assertDifferentRLikeAndNotRLikePatterns(String firstPattern, String secondPattern) {
|
||||
LogicalPlan p = plan("SELECT keyword k FROM test WHERE k RLIKE '" + firstPattern + "' AND k NOT RLIKE '" + secondPattern + "'");
|
||||
assertTrue(p instanceof Project);
|
||||
p = ((Project) p).child();
|
||||
assertTrue(p instanceof Filter);
|
||||
|
||||
Expression condition = ((Filter) p).condition();
|
||||
QueryTranslation qt = QueryTranslator.toQuery(condition, false);
|
||||
assertEquals(BoolQuery.class, qt.query.getClass());
|
||||
BoolQuery bq = ((BoolQuery) qt.query);
|
||||
assertTrue(bq.isAnd());
|
||||
assertTrue(bq.left() instanceof QueryStringQuery);
|
||||
assertTrue(bq.right() instanceof NotQuery);
|
||||
|
||||
NotQuery nq = (NotQuery) bq.right();
|
||||
assertTrue(nq.child() instanceof QueryStringQuery);
|
||||
QueryStringQuery lqsq = (QueryStringQuery) bq.left();
|
||||
QueryStringQuery rqsq = (QueryStringQuery) nq.child();
|
||||
|
||||
assertEquals("/" + firstPattern + "/", lqsq.query());
|
||||
assertEquals(1, lqsq.fields().size());
|
||||
assertEquals("keyword", lqsq.fields().keySet().iterator().next());
|
||||
assertEquals("/" + secondPattern + "/", rqsq.query());
|
||||
assertEquals(1, rqsq.fields().size());
|
||||
assertEquals("keyword", rqsq.fields().keySet().iterator().next());
|
||||
}
|
||||
|
||||
public void testTranslateNotExpression_WhereClause_Painless() {
|
||||
LogicalPlan p = plan("SELECT * FROM test WHERE NOT(POSITION('x', keyword) = 0)");
|
||||
|
|
Loading…
Reference in New Issue