SQL: Implement null handling for `IN(v1, v2, ...)` (#34750)
Implemented null handling for both the value tested but also for values inside the list of values tested against. The null handling is implemented for local processors, painless scripts and Lucene Terms queries making it available for `IN` expressions occuring in `SELECT`, `WHERE` and `HAVING` clauses. Closes: #34582
This commit is contained in:
parent
d5ad3de42e
commit
4c73854da7
|
@ -233,12 +233,9 @@ public enum DataType {
|
|||
public boolean isCompatibleWith(DataType other) {
|
||||
if (this == other) {
|
||||
return true;
|
||||
} else if (isString() && other.isString()) {
|
||||
return true;
|
||||
} else if (isNumeric() && other.isNumeric()) {
|
||||
return true;
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
} else return
|
||||
(this == NULL || other == NULL) ||
|
||||
(isString() && other.isString()) ||
|
||||
(isNumeric() && other.isNumeric());
|
||||
}
|
||||
}
|
||||
|
|
|
@ -48,7 +48,7 @@ public abstract class Foldables {
|
|||
public static <T> List<T> valuesOf(List<Expression> list, DataType to) {
|
||||
List<T> l = new ArrayList<>(list.size());
|
||||
for (Expression e : list) {
|
||||
l.add(valueOf(e, to));
|
||||
l.add(valueOf(e, to));
|
||||
}
|
||||
return l;
|
||||
}
|
||||
|
|
|
@ -82,16 +82,18 @@ public class In extends NamedExpression implements ScriptWeaver {
|
|||
}
|
||||
|
||||
@Override
|
||||
public Object fold() {
|
||||
public Boolean fold() {
|
||||
Object foldedLeftValue = value.fold();
|
||||
|
||||
Boolean result = false;
|
||||
for (Expression rightValue : list) {
|
||||
Boolean compResult = Comparisons.eq(foldedLeftValue, rightValue.fold());
|
||||
if (compResult != null && compResult) {
|
||||
if (compResult == null) {
|
||||
result = null;
|
||||
} else if (compResult) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
return result;
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -118,15 +120,18 @@ public class In extends NamedExpression implements ScriptWeaver {
|
|||
String scriptPrefix = leftScript + "==";
|
||||
LinkedHashSet<Object> values = list.stream().map(Expression::fold).collect(Collectors.toCollection(LinkedHashSet::new));
|
||||
for (Object valueFromList : values) {
|
||||
if (valueFromList instanceof Expression) {
|
||||
ScriptTemplate rightScript = asScript((Expression) valueFromList);
|
||||
sj.add(scriptPrefix + rightScript.template());
|
||||
rightParams.add(rightScript.params());
|
||||
} else {
|
||||
if (valueFromList instanceof String) {
|
||||
sj.add(scriptPrefix + '"' + valueFromList + '"');
|
||||
// if checked against null => false
|
||||
if (valueFromList != null) {
|
||||
if (valueFromList instanceof Expression) {
|
||||
ScriptTemplate rightScript = asScript((Expression) valueFromList);
|
||||
sj.add(scriptPrefix + rightScript.template());
|
||||
rightParams.add(rightScript.params());
|
||||
} else {
|
||||
sj.add(scriptPrefix + valueFromList.toString());
|
||||
if (valueFromList instanceof String) {
|
||||
sj.add(scriptPrefix + '"' + valueFromList + '"');
|
||||
} else {
|
||||
sj.add(scriptPrefix + valueFromList.toString());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -40,14 +40,17 @@ public class InProcessor implements Processor {
|
|||
@Override
|
||||
public Object process(Object input) {
|
||||
Object leftValue = processsors.get(processsors.size() - 1).process(input);
|
||||
Boolean result = false;
|
||||
|
||||
for (int i = 0; i < processsors.size() - 1; i++) {
|
||||
Boolean compResult = Comparisons.eq(leftValue, processsors.get(i).process(input));
|
||||
if (compResult != null && compResult) {
|
||||
if (compResult == null) {
|
||||
result = null;
|
||||
} else if (compResult) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
return result;
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -9,6 +9,7 @@ import org.elasticsearch.index.query.QueryBuilder;
|
|||
import org.elasticsearch.xpack.sql.expression.Expression;
|
||||
import org.elasticsearch.xpack.sql.expression.Foldables;
|
||||
import org.elasticsearch.xpack.sql.tree.Location;
|
||||
import org.elasticsearch.xpack.sql.type.DataType;
|
||||
|
||||
import java.util.LinkedHashSet;
|
||||
import java.util.List;
|
||||
|
@ -24,7 +25,9 @@ public class TermsQuery extends LeafQuery {
|
|||
public TermsQuery(Location location, String term, List<Expression> values) {
|
||||
super(location);
|
||||
this.term = term;
|
||||
values.removeIf(e -> e.dataType() == DataType.NULL);
|
||||
this.values = new LinkedHashSet<>(Foldables.valuesOf(values, values.get(0).dataType()));
|
||||
this.values.removeIf(Objects::isNull);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -22,6 +22,7 @@ public class InProcessorTests extends AbstractWireSerializingTestCase<InProcesso
|
|||
private static final Literal ONE = L(1);
|
||||
private static final Literal TWO = L(2);
|
||||
private static final Literal THREE = L(3);
|
||||
private static final Literal NULL = L(null);
|
||||
|
||||
public static InProcessor randomProcessor() {
|
||||
return new InProcessor(Arrays.asList(new ConstantProcessor(randomLong()), new ConstantProcessor(randomLong())));
|
||||
|
@ -47,6 +48,16 @@ public class InProcessorTests extends AbstractWireSerializingTestCase<InProcesso
|
|||
assertEquals(false, new In(EMPTY, THREE, Arrays.asList(ONE, TWO)).makePipe().asProcessor().process(null));
|
||||
}
|
||||
|
||||
public void testHandleNullOnLeftValue() {
|
||||
assertNull(new In(EMPTY, NULL, Arrays.asList(ONE, TWO, THREE)).makePipe().asProcessor().process(null));
|
||||
assertNull(new In(EMPTY, NULL, Arrays.asList(ONE, NULL, TWO)).makePipe().asProcessor().process(null));
|
||||
}
|
||||
|
||||
public void testHandleNullOnRightValue() {
|
||||
assertEquals(true, new In(EMPTY, THREE, Arrays.asList(ONE, NULL, THREE)).makePipe().asProcessor().process(null));
|
||||
assertNull(new In(EMPTY, TWO, Arrays.asList(ONE, NULL, THREE)).makePipe().asProcessor().process(null));
|
||||
}
|
||||
|
||||
private static Literal L(Object value) {
|
||||
return Literal.of(EMPTY, value);
|
||||
}
|
||||
|
|
|
@ -0,0 +1,50 @@
|
|||
/*
|
||||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
||||
* or more contributor license agreements. Licensed under the Elastic License;
|
||||
* you may not use this file except in compliance with the Elastic License.
|
||||
*/
|
||||
package org.elasticsearch.xpack.sql.expression.predicate;
|
||||
|
||||
import org.elasticsearch.test.ESTestCase;
|
||||
import org.elasticsearch.xpack.sql.expression.Literal;
|
||||
|
||||
import java.util.Arrays;
|
||||
|
||||
import static org.elasticsearch.xpack.sql.tree.Location.EMPTY;
|
||||
|
||||
public class InTests extends ESTestCase {
|
||||
|
||||
private static final Literal ONE = L(1);
|
||||
private static final Literal TWO = L(2);
|
||||
private static final Literal THREE = L(3);
|
||||
private static final Literal NULL = L(null);
|
||||
|
||||
public void testInWithContainedValue() {
|
||||
In in = new In(EMPTY, TWO, Arrays.asList(ONE, TWO, THREE));
|
||||
assertTrue(in.fold());
|
||||
}
|
||||
|
||||
public void testInWithNotContainedValue() {
|
||||
In in = new In(EMPTY, THREE, Arrays.asList(ONE, TWO));
|
||||
assertFalse(in.fold());
|
||||
}
|
||||
|
||||
public void testHandleNullOnLeftValue() {
|
||||
In in = new In(EMPTY, NULL, Arrays.asList(ONE, TWO, THREE));
|
||||
assertNull(in.fold());
|
||||
in = new In(EMPTY, NULL, Arrays.asList(ONE, NULL, THREE));
|
||||
assertNull(in.fold());
|
||||
|
||||
}
|
||||
|
||||
public void testHandleNullsOnRightValue() {
|
||||
In in = new In(EMPTY, THREE, Arrays.asList(ONE, NULL, THREE));
|
||||
assertTrue(in.fold());
|
||||
in = new In(EMPTY, ONE, Arrays.asList(TWO, NULL, THREE));
|
||||
assertNull(in.fold());
|
||||
}
|
||||
|
||||
private static Literal L(Object value) {
|
||||
return Literal.of(EMPTY, value);
|
||||
}
|
||||
}
|
|
@ -173,6 +173,19 @@ public class QueryTranslatorTests extends AbstractBuilderTestCase {
|
|||
assertEquals("keyword:(bar foo lala)", tq.asBuilder().toQuery(createShardContext()).toString());
|
||||
}
|
||||
|
||||
public void testTranslateInExpression_WhereClauseAndNullHAndling() throws IOException {
|
||||
LogicalPlan p = plan("SELECT * FROM test WHERE keyword IN ('foo', null, 'lala', null, 'foo', concat('la', 'la'))");
|
||||
assertTrue(p instanceof Project);
|
||||
assertTrue(p.children().get(0) instanceof Filter);
|
||||
Expression condition = ((Filter) p.children().get(0)).condition();
|
||||
assertFalse(condition.foldable());
|
||||
QueryTranslation translation = QueryTranslator.toQuery(condition, false);
|
||||
Query query = translation.query;
|
||||
assertTrue(query instanceof TermsQuery);
|
||||
TermsQuery tq = (TermsQuery) query;
|
||||
assertEquals("keyword:(foo lala)", tq.asBuilder().toQuery(createShardContext()).toString());
|
||||
}
|
||||
|
||||
public void testTranslateInExpressionInvalidValues_WhereClause() {
|
||||
LogicalPlan p = plan("SELECT * FROM test WHERE keyword IN ('foo', 'bar', keyword)");
|
||||
assertTrue(p instanceof Project);
|
||||
|
@ -196,4 +209,17 @@ public class QueryTranslatorTests extends AbstractBuilderTestCase {
|
|||
assertEquals("InternalSqlScriptUtils.nullSafeFilter(params.a0==10 || params.a0==20)", sq.script().toString());
|
||||
assertThat(sq.script().params().toString(), startsWith("[{a=MAX(int){a->"));
|
||||
}
|
||||
|
||||
public void testTranslateInExpression_HavingClauseAndNullHandling_Painless() {
|
||||
LogicalPlan p = plan("SELECT keyword, max(int) FROM test GROUP BY keyword HAVING max(int) in (10, null, 20, null, 30 - 10)");
|
||||
assertTrue(p instanceof Project);
|
||||
assertTrue(p.children().get(0) instanceof Filter);
|
||||
Expression condition = ((Filter) p.children().get(0)).condition();
|
||||
assertFalse(condition.foldable());
|
||||
QueryTranslation translation = QueryTranslator.toQuery(condition, false);
|
||||
assertTrue(translation.query instanceof ScriptQuery);
|
||||
ScriptQuery sq = (ScriptQuery) translation.query;
|
||||
assertEquals("InternalSqlScriptUtils.nullSafeFilter(params.a0==10 || params.a0==20)", sq.script().toString());
|
||||
assertThat(sq.script().params().toString(), startsWith("[{a=MAX(int){a->"));
|
||||
}
|
||||
}
|
||||
|
|
|
@ -450,3 +450,9 @@ selectHireDateGroupByHireDate
|
|||
SELECT hire_date HD, COUNT(*) c FROM test_emp GROUP BY hire_date ORDER BY hire_date DESC;
|
||||
selectSalaryGroupBySalary
|
||||
SELECT salary, COUNT(*) c FROM test_emp GROUP BY salary ORDER BY salary DESC;
|
||||
|
||||
// filter with IN
|
||||
aggMultiWithHavingUsingInAndNullHandling
|
||||
SELECT MIN(salary) min, MAX(salary) max, gender g, COUNT(*) c FROM "test_emp" WHERE languages > 0 GROUP BY g HAVING max IN(74999, null, 74600) ORDER BY gender;
|
||||
aggMultiGroupByMultiWithHavingUsingInAndNullHandling
|
||||
SELECT MIN(salary) min, MAX(salary) max, gender g, languages l, COUNT(*) c FROM "test_emp" WHERE languages > 0 GROUP BY g, languages HAVING max IN (74500, null, 74600) ORDER BY gender, languages;
|
||||
|
|
|
@ -96,3 +96,8 @@ SELECT last_name l FROM "test_emp" WHERE emp_no NOT IN (10000, 10001, 10002, 999
|
|||
|
||||
whereWithInAndComplexFunctions
|
||||
SELECT last_name l FROM "test_emp" WHERE emp_no NOT IN (10000, abs(2 - 10003), 10002, 999) AND lcase(first_name) IN ('sumant', 'mary', 'patricio', 'No''Match') ORDER BY emp_no LIMIT 5;
|
||||
|
||||
whereWithInAndNullHandling1
|
||||
SELECT last_name l FROM "test_emp" WHERE birth_date in (CAST('2018-10-01T00:00:00Z' AS TIMESTAMP), CAST('1959-10-01T00:00:00Z' AS TIMESTAMP)) AND (emp_no = 10038 OR emp_no = 10039 OR emp_no = 10040) ORDER BY emp_no;
|
||||
whereWithInAndNullHandling2
|
||||
SELECT last_name l FROM "test_emp" WHERE birth_date in (CAST('2018-10-01T00:00:00Z' AS TIMESTAMP), null, CAST('1959-10-01T00:00:00Z' AS TIMESTAMP)) AND (emp_no = 10038 OR emp_no = 10039 OR emp_no = 10040) ORDER BY emp_no;
|
||||
|
|
|
@ -25,6 +25,22 @@ false |true
|
|||
;
|
||||
|
||||
|
||||
inWithNullHandling
|
||||
SELECT 2 IN (1, null, 3), 3 IN (1, null, 3), null IN (1, null, 3), null IN (1, 2, 3);
|
||||
|
||||
2 IN (1, null, 3) | 3 IN (1, null, 3) | null IN (1, null, 3) | null IN (1, 2, 3)
|
||||
--------------------+--------------------+-----------------------+-------------------
|
||||
null |true |null | null
|
||||
;
|
||||
|
||||
inWithNullHandlingAndNegation
|
||||
SELECT NOT 2 IN (1, null, 3), NOT 3 IN (1, null, 3), NOT null IN (1, null, 3), NOT null IN (1, 2, 3);
|
||||
|
||||
NOT 2 IN (1, null, 3) | NOT 3 IN (1, null, 3) | NOT null IN (1, null, 3) | null IN (1, 2, 3)
|
||||
------------------------+------------------------+---------------------------+--------------------
|
||||
null |false |null | null
|
||||
;
|
||||
|
||||
//
|
||||
// SELECT with IN and table columns
|
||||
//
|
||||
|
@ -64,4 +80,23 @@ SELECT 1 IN (1, abs(2 - 4), 3) OR emp_no NOT IN (10000, 10000 + 1, 10002) FROM t
|
|||
10003
|
||||
10004
|
||||
10005
|
||||
;
|
||||
;
|
||||
|
||||
inWithTableColumnAndNullHandling
|
||||
SELECT emp_no, birth_date in (CAST('2018-10-01T00:00:00Z' AS TIMESTAMP), CAST('1959-10-01T00:00:00Z' AS TIMESTAMP)), birth_date in (CAST('2018-10-01T00:00:00Z' AS TIMESTAMP), null, CAST('1959-10-01T00:00:00Z' AS TIMESTAMP)) FROM test_emp WHERE emp_no = 10038 OR emp_no = 10039 OR emp_no = 10040 ORDER BY 1;
|
||||
|
||||
emp_no | birth_date in (CAST('2018-10-01T00:00:00Z' AS TIMESTAMP), CAST('1959-10-01T00:00:00Z' AS TIMESTAMP)) | birth_date in (CAST('2018-10-01T00:00:00Z' AS TIMESTAMP), null, CAST('1959-10-01T00:00:00Z' AS TIMESTAMP))
|
||||
--------+-------------------------------------------------------------------------------------------------------+------------------------------------------------------------------------------------------------------------
|
||||
10038 | true | true
|
||||
10039 | null | null
|
||||
10040 | false | null
|
||||
|
||||
|
||||
inWithTableColumnAndNullHandlingAndNegation
|
||||
SELECT emp_no, NOT birth_date in (CAST('2018-10-01T00:00:00Z' AS TIMESTAMP), CAST('1959-10-01T00:00:00Z' AS TIMESTAMP)), NOT birth_date in (CAST('2018-10-01T00:00:00Z' AS TIMESTAMP), null, CAST('1959-10-01T00:00:00Z' AS TIMESTAMP)) FROM test_emp WHERE emp_no = 10038 OR emp_no = 10039 OR emp_no = 10040 ORDER BY 1;
|
||||
|
||||
emp_no | NOT birth_date in (CAST('2018-10-01T00:00:00Z' AS TIMESTAMP), CAST('1959-10-01T00:00:00Z' AS TIMESTAMP)) | NOT birth_date in (CAST('2018-10-01T00:00:00Z' AS TIMESTAMP), null, CAST('1959-10-01T00:00:00Z' AS TIMESTAMP))
|
||||
--------+-----------------------------------------------------------------------------------------------------------+------------------------------------------------------------------------------------------------------------
|
||||
10038 | false | false
|
||||
10039 | null | null
|
||||
10040 | true | null
|
Loading…
Reference in New Issue