From 4c73854da78d9f95e1c9a9aa7ae973e870102ba6 Mon Sep 17 00:00:00 2001 From: Marios Trivyzas Date: Wed, 24 Oct 2018 14:42:40 +0200 Subject: [PATCH] SQL: Implement null handling for `IN(v1, v2, ...)` (#34750) Implemented null handling for both the value tested but also for values inside the list of values tested against. The null handling is implemented for local processors, painless scripts and Lucene Terms queries making it available for `IN` expressions occuring in `SELECT`, `WHERE` and `HAVING` clauses. Closes: #34582 --- .../xpack/sql/type/DataType.java | 11 ++-- .../xpack/sql/expression/Foldables.java | 2 +- .../xpack/sql/expression/predicate/In.java | 29 ++++++----- .../operator/comparison/InProcessor.java | 7 ++- .../xpack/sql/querydsl/query/TermsQuery.java | 3 ++ .../predicate/InProcessorTests.java | 11 ++++ .../sql/expression/predicate/InTests.java | 50 +++++++++++++++++++ .../sql/planner/QueryTranslatorTests.java | 26 ++++++++++ x-pack/qa/sql/src/main/resources/agg.sql-spec | 6 +++ .../qa/sql/src/main/resources/filter.sql-spec | 5 ++ .../qa/sql/src/main/resources/select.csv-spec | 37 +++++++++++++- 11 files changed, 164 insertions(+), 23 deletions(-) create mode 100644 x-pack/plugin/sql/src/test/java/org/elasticsearch/xpack/sql/expression/predicate/InTests.java diff --git a/x-pack/plugin/sql/sql-proto/src/main/java/org/elasticsearch/xpack/sql/type/DataType.java b/x-pack/plugin/sql/sql-proto/src/main/java/org/elasticsearch/xpack/sql/type/DataType.java index 1c9cf6ac925..88b952b87ac 100644 --- a/x-pack/plugin/sql/sql-proto/src/main/java/org/elasticsearch/xpack/sql/type/DataType.java +++ b/x-pack/plugin/sql/sql-proto/src/main/java/org/elasticsearch/xpack/sql/type/DataType.java @@ -233,12 +233,9 @@ public enum DataType { public boolean isCompatibleWith(DataType other) { if (this == other) { return true; - } else if (isString() && other.isString()) { - return true; - } else if (isNumeric() && other.isNumeric()) { - return true; - } else { - return false; - } + } else return + (this == NULL || other == NULL) || + (isString() && other.isString()) || + (isNumeric() && other.isNumeric()); } } diff --git a/x-pack/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/expression/Foldables.java b/x-pack/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/expression/Foldables.java index 6e06a1d1c85..8c672ed162e 100644 --- a/x-pack/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/expression/Foldables.java +++ b/x-pack/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/expression/Foldables.java @@ -48,7 +48,7 @@ public abstract class Foldables { public static List valuesOf(List list, DataType to) { List l = new ArrayList<>(list.size()); for (Expression e : list) { - l.add(valueOf(e, to)); + l.add(valueOf(e, to)); } return l; } diff --git a/x-pack/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/expression/predicate/In.java b/x-pack/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/expression/predicate/In.java index a820833d1a0..1574e406a1e 100644 --- a/x-pack/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/expression/predicate/In.java +++ b/x-pack/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/expression/predicate/In.java @@ -82,16 +82,18 @@ public class In extends NamedExpression implements ScriptWeaver { } @Override - public Object fold() { + public Boolean fold() { Object foldedLeftValue = value.fold(); - + Boolean result = false; for (Expression rightValue : list) { Boolean compResult = Comparisons.eq(foldedLeftValue, rightValue.fold()); - if (compResult != null && compResult) { + if (compResult == null) { + result = null; + } else if (compResult) { return true; } } - return false; + return result; } @Override @@ -118,15 +120,18 @@ public class In extends NamedExpression implements ScriptWeaver { String scriptPrefix = leftScript + "=="; LinkedHashSet values = list.stream().map(Expression::fold).collect(Collectors.toCollection(LinkedHashSet::new)); for (Object valueFromList : values) { - if (valueFromList instanceof Expression) { - ScriptTemplate rightScript = asScript((Expression) valueFromList); - sj.add(scriptPrefix + rightScript.template()); - rightParams.add(rightScript.params()); - } else { - if (valueFromList instanceof String) { - sj.add(scriptPrefix + '"' + valueFromList + '"'); + // if checked against null => false + if (valueFromList != null) { + if (valueFromList instanceof Expression) { + ScriptTemplate rightScript = asScript((Expression) valueFromList); + sj.add(scriptPrefix + rightScript.template()); + rightParams.add(rightScript.params()); } else { - sj.add(scriptPrefix + valueFromList.toString()); + if (valueFromList instanceof String) { + sj.add(scriptPrefix + '"' + valueFromList + '"'); + } else { + sj.add(scriptPrefix + valueFromList.toString()); + } } } } diff --git a/x-pack/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/expression/predicate/operator/comparison/InProcessor.java b/x-pack/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/expression/predicate/operator/comparison/InProcessor.java index 5ebf8870965..0a901b5b5e6 100644 --- a/x-pack/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/expression/predicate/operator/comparison/InProcessor.java +++ b/x-pack/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/expression/predicate/operator/comparison/InProcessor.java @@ -40,14 +40,17 @@ public class InProcessor implements Processor { @Override public Object process(Object input) { Object leftValue = processsors.get(processsors.size() - 1).process(input); + Boolean result = false; for (int i = 0; i < processsors.size() - 1; i++) { Boolean compResult = Comparisons.eq(leftValue, processsors.get(i).process(input)); - if (compResult != null && compResult) { + if (compResult == null) { + result = null; + } else if (compResult) { return true; } } - return false; + return result; } @Override diff --git a/x-pack/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/querydsl/query/TermsQuery.java b/x-pack/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/querydsl/query/TermsQuery.java index 412df4e8ca6..4366e2d404c 100644 --- a/x-pack/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/querydsl/query/TermsQuery.java +++ b/x-pack/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/querydsl/query/TermsQuery.java @@ -9,6 +9,7 @@ import org.elasticsearch.index.query.QueryBuilder; import org.elasticsearch.xpack.sql.expression.Expression; import org.elasticsearch.xpack.sql.expression.Foldables; import org.elasticsearch.xpack.sql.tree.Location; +import org.elasticsearch.xpack.sql.type.DataType; import java.util.LinkedHashSet; import java.util.List; @@ -24,7 +25,9 @@ public class TermsQuery extends LeafQuery { public TermsQuery(Location location, String term, List values) { super(location); this.term = term; + values.removeIf(e -> e.dataType() == DataType.NULL); this.values = new LinkedHashSet<>(Foldables.valuesOf(values, values.get(0).dataType())); + this.values.removeIf(Objects::isNull); } @Override diff --git a/x-pack/plugin/sql/src/test/java/org/elasticsearch/xpack/sql/expression/predicate/InProcessorTests.java b/x-pack/plugin/sql/src/test/java/org/elasticsearch/xpack/sql/expression/predicate/InProcessorTests.java index 3e71ac90f81..12bba003115 100644 --- a/x-pack/plugin/sql/src/test/java/org/elasticsearch/xpack/sql/expression/predicate/InProcessorTests.java +++ b/x-pack/plugin/sql/src/test/java/org/elasticsearch/xpack/sql/expression/predicate/InProcessorTests.java @@ -22,6 +22,7 @@ public class InProcessorTests extends AbstractWireSerializingTestCase")); } + + public void testTranslateInExpression_HavingClauseAndNullHandling_Painless() { + LogicalPlan p = plan("SELECT keyword, max(int) FROM test GROUP BY keyword HAVING max(int) in (10, null, 20, null, 30 - 10)"); + assertTrue(p instanceof Project); + assertTrue(p.children().get(0) instanceof Filter); + Expression condition = ((Filter) p.children().get(0)).condition(); + assertFalse(condition.foldable()); + QueryTranslation translation = QueryTranslator.toQuery(condition, false); + assertTrue(translation.query instanceof ScriptQuery); + ScriptQuery sq = (ScriptQuery) translation.query; + assertEquals("InternalSqlScriptUtils.nullSafeFilter(params.a0==10 || params.a0==20)", sq.script().toString()); + assertThat(sq.script().params().toString(), startsWith("[{a=MAX(int){a->")); + } } diff --git a/x-pack/qa/sql/src/main/resources/agg.sql-spec b/x-pack/qa/sql/src/main/resources/agg.sql-spec index 2c6248059f5..110882dc21e 100644 --- a/x-pack/qa/sql/src/main/resources/agg.sql-spec +++ b/x-pack/qa/sql/src/main/resources/agg.sql-spec @@ -450,3 +450,9 @@ selectHireDateGroupByHireDate SELECT hire_date HD, COUNT(*) c FROM test_emp GROUP BY hire_date ORDER BY hire_date DESC; selectSalaryGroupBySalary SELECT salary, COUNT(*) c FROM test_emp GROUP BY salary ORDER BY salary DESC; + +// filter with IN +aggMultiWithHavingUsingInAndNullHandling +SELECT MIN(salary) min, MAX(salary) max, gender g, COUNT(*) c FROM "test_emp" WHERE languages > 0 GROUP BY g HAVING max IN(74999, null, 74600) ORDER BY gender; +aggMultiGroupByMultiWithHavingUsingInAndNullHandling +SELECT MIN(salary) min, MAX(salary) max, gender g, languages l, COUNT(*) c FROM "test_emp" WHERE languages > 0 GROUP BY g, languages HAVING max IN (74500, null, 74600) ORDER BY gender, languages; diff --git a/x-pack/qa/sql/src/main/resources/filter.sql-spec b/x-pack/qa/sql/src/main/resources/filter.sql-spec index 1a564ecb9ad..79b3836b959 100644 --- a/x-pack/qa/sql/src/main/resources/filter.sql-spec +++ b/x-pack/qa/sql/src/main/resources/filter.sql-spec @@ -96,3 +96,8 @@ SELECT last_name l FROM "test_emp" WHERE emp_no NOT IN (10000, 10001, 10002, 999 whereWithInAndComplexFunctions SELECT last_name l FROM "test_emp" WHERE emp_no NOT IN (10000, abs(2 - 10003), 10002, 999) AND lcase(first_name) IN ('sumant', 'mary', 'patricio', 'No''Match') ORDER BY emp_no LIMIT 5; + +whereWithInAndNullHandling1 +SELECT last_name l FROM "test_emp" WHERE birth_date in (CAST('2018-10-01T00:00:00Z' AS TIMESTAMP), CAST('1959-10-01T00:00:00Z' AS TIMESTAMP)) AND (emp_no = 10038 OR emp_no = 10039 OR emp_no = 10040) ORDER BY emp_no; +whereWithInAndNullHandling2 +SELECT last_name l FROM "test_emp" WHERE birth_date in (CAST('2018-10-01T00:00:00Z' AS TIMESTAMP), null, CAST('1959-10-01T00:00:00Z' AS TIMESTAMP)) AND (emp_no = 10038 OR emp_no = 10039 OR emp_no = 10040) ORDER BY emp_no; diff --git a/x-pack/qa/sql/src/main/resources/select.csv-spec b/x-pack/qa/sql/src/main/resources/select.csv-spec index b3888abd47b..bf208c62026 100644 --- a/x-pack/qa/sql/src/main/resources/select.csv-spec +++ b/x-pack/qa/sql/src/main/resources/select.csv-spec @@ -25,6 +25,22 @@ false |true ; +inWithNullHandling +SELECT 2 IN (1, null, 3), 3 IN (1, null, 3), null IN (1, null, 3), null IN (1, 2, 3); + + 2 IN (1, null, 3) | 3 IN (1, null, 3) | null IN (1, null, 3) | null IN (1, 2, 3) +--------------------+--------------------+-----------------------+------------------- +null |true |null | null +; + +inWithNullHandlingAndNegation +SELECT NOT 2 IN (1, null, 3), NOT 3 IN (1, null, 3), NOT null IN (1, null, 3), NOT null IN (1, 2, 3); + + NOT 2 IN (1, null, 3) | NOT 3 IN (1, null, 3) | NOT null IN (1, null, 3) | null IN (1, 2, 3) +------------------------+------------------------+---------------------------+-------------------- +null |false |null | null +; + // // SELECT with IN and table columns // @@ -64,4 +80,23 @@ SELECT 1 IN (1, abs(2 - 4), 3) OR emp_no NOT IN (10000, 10000 + 1, 10002) FROM t 10003 10004 10005 -; \ No newline at end of file +; + +inWithTableColumnAndNullHandling +SELECT emp_no, birth_date in (CAST('2018-10-01T00:00:00Z' AS TIMESTAMP), CAST('1959-10-01T00:00:00Z' AS TIMESTAMP)), birth_date in (CAST('2018-10-01T00:00:00Z' AS TIMESTAMP), null, CAST('1959-10-01T00:00:00Z' AS TIMESTAMP)) FROM test_emp WHERE emp_no = 10038 OR emp_no = 10039 OR emp_no = 10040 ORDER BY 1; + + emp_no | birth_date in (CAST('2018-10-01T00:00:00Z' AS TIMESTAMP), CAST('1959-10-01T00:00:00Z' AS TIMESTAMP)) | birth_date in (CAST('2018-10-01T00:00:00Z' AS TIMESTAMP), null, CAST('1959-10-01T00:00:00Z' AS TIMESTAMP)) +--------+-------------------------------------------------------------------------------------------------------+------------------------------------------------------------------------------------------------------------ +10038 | true | true +10039 | null | null +10040 | false | null + + +inWithTableColumnAndNullHandlingAndNegation +SELECT emp_no, NOT birth_date in (CAST('2018-10-01T00:00:00Z' AS TIMESTAMP), CAST('1959-10-01T00:00:00Z' AS TIMESTAMP)), NOT birth_date in (CAST('2018-10-01T00:00:00Z' AS TIMESTAMP), null, CAST('1959-10-01T00:00:00Z' AS TIMESTAMP)) FROM test_emp WHERE emp_no = 10038 OR emp_no = 10039 OR emp_no = 10040 ORDER BY 1; + + emp_no | NOT birth_date in (CAST('2018-10-01T00:00:00Z' AS TIMESTAMP), CAST('1959-10-01T00:00:00Z' AS TIMESTAMP)) | NOT birth_date in (CAST('2018-10-01T00:00:00Z' AS TIMESTAMP), null, CAST('1959-10-01T00:00:00Z' AS TIMESTAMP)) +--------+-----------------------------------------------------------------------------------------------------------+------------------------------------------------------------------------------------------------------------ +10038 | false | false +10039 | null | null +10040 | true | null \ No newline at end of file