From 3cc816694687c0204469b8dd26435bf4f2bf645d Mon Sep 17 00:00:00 2001 From: Andrei Stefan Date: Tue, 9 Jun 2020 10:09:01 +0300 Subject: [PATCH] SQL: handle MIN and MAX functions on dates in Painless scripts (#57605) (#57863) * Convert to date/datetime the result of numeric aggregations (min, max) in Painless scripts (cherry picked from commit f1de99e2a6fbf3806c4f2b6b809738aa8faa2d75) --- .../function/scalar/ScalarFunction.java | 16 +++- .../qa/server/src/main/resources/agg.csv-spec | 79 +++++++++++++++++++ .../sql/planner/QueryTranslatorTests.java | 39 ++++++++- 3 files changed, 130 insertions(+), 4 deletions(-) diff --git a/x-pack/plugin/ql/src/main/java/org/elasticsearch/xpack/ql/expression/function/scalar/ScalarFunction.java b/x-pack/plugin/ql/src/main/java/org/elasticsearch/xpack/ql/expression/function/scalar/ScalarFunction.java index ddf59f8fb1a..7a0157120ae 100644 --- a/x-pack/plugin/ql/src/main/java/org/elasticsearch/xpack/ql/expression/function/scalar/ScalarFunction.java +++ b/x-pack/plugin/ql/src/main/java/org/elasticsearch/xpack/ql/expression/function/scalar/ScalarFunction.java @@ -14,6 +14,7 @@ import org.elasticsearch.xpack.ql.expression.function.grouping.GroupingFunction; import org.elasticsearch.xpack.ql.expression.gen.script.ScriptTemplate; import org.elasticsearch.xpack.ql.expression.gen.script.Scripts; import org.elasticsearch.xpack.ql.tree.Source; +import org.elasticsearch.xpack.ql.type.DataTypes; import org.elasticsearch.xpack.ql.util.DateUtils; import java.time.OffsetTime; @@ -108,19 +109,30 @@ public abstract class ScalarFunction extends Function { } protected ScriptTemplate scriptWithAggregate(AggregateFunction aggregate) { - String template = "{}"; + String template = basicTemplate(aggregate); return new ScriptTemplate(processScript(template), paramsBuilder().agg(aggregate).build(), dataType()); } + // This method isn't actually used at the moment, since there is no grouping function (ie HISTOGRAM) + // that currently results in a script being generated protected ScriptTemplate scriptWithGrouping(GroupingFunction grouping) { - String template = "{}"; + String template = basicTemplate(grouping); return new ScriptTemplate(processScript(template), paramsBuilder().grouping(grouping).build(), dataType()); } + // FIXME: this needs to be refactored to account for different datatypes in different projects (ie DATE from SQL) + private String basicTemplate(Function function) { + if (function.dataType().name().equals("DATE") || function.dataType() == DataTypes.DATETIME) { + return "{sql}.asDateTime({})"; + } else { + return "{}"; + } + } + protected ScriptTemplate scriptWithField(FieldAttribute field) { return new ScriptTemplate(processScript(Scripts.DOC_VALUE), paramsBuilder().variable(field.name()).build(), diff --git a/x-pack/plugin/sql/qa/server/src/main/resources/agg.csv-spec b/x-pack/plugin/sql/qa/server/src/main/resources/agg.csv-spec index dbcbbd83943..d4fd02ec782 100644 --- a/x-pack/plugin/sql/qa/server/src/main/resources/agg.csv-spec +++ b/x-pack/plugin/sql/qa/server/src/main/resources/agg.csv-spec @@ -1188,3 +1188,82 @@ GROUP BY gender ORDER BY gender; 17811.071545718776|1.2151168881502939E11|3.1723426960671306E8|F 15904.093950318531|1.699198993070239E11 |2.529402043805585E8 |M ; + + +aggWithMinOfDatesAndCastAsDate +schema::g:s|m:date +SELECT gender g, MIN(CAST(birth_date AS DATE)) m FROM test_emp GROUP BY gender HAVING MIN(CAST(birth_date AS DATE)) < NOW() ORDER BY g; + + g | m +---------------+------------------------ +null |1953-01-23T00:00:00.000Z +F |1952-04-19T00:00:00.000Z +M |1952-02-27T00:00:00.000Z +; + +aggWithMinOfDatetime +schema::g:s|m:ts +SELECT gender g, MIN(birth_date) m FROM test_emp GROUP BY gender HAVING m < NOW() ORDER BY gender; + + g | m +---------------+------------------------ +null |1953-01-23T00:00:00.000Z +F |1952-04-19T00:00:00.000Z +M |1952-02-27T00:00:00.000Z +; + +aggWithMinOfDatetimeAndDate +schema::g:s|mc:date|m:ts +SELECT gender g, MIN(CAST(birth_date AS DATE)) mc, MIN(birth_date) m FROM test_emp GROUP BY gender HAVING MIN(CAST(birth_date AS DATE)) < NOW() AND MIN(birth_date) <= CURRENT_TIMESTAMP() ORDER BY g; + + g | mc | m +---------------+------------------------+------------------------ +null |1953-01-23T00:00:00.000Z|1953-01-23T00:00:00.000Z +F |1952-04-19T00:00:00.000Z|1952-04-19T00:00:00.000Z +M |1952-02-27T00:00:00.000Z|1952-02-27T00:00:00.000Z +; + +aggWithMaxOfDatetime +schema::g:s|m:ts +SELECT gender g, MAX(birth_date) m FROM test_emp GROUP BY gender HAVING m < NOW() ORDER BY gender; + + g | m +---------------+------------------------ +null |1963-06-07T00:00:00.000Z +F |1964-10-18T00:00:00.000Z +M |1965-01-03T00:00:00.000Z +; + +aggWithMaxOfDate +schema::g:s|m:date +SELECT gender g, MAX(CAST(birth_date AS DATE)) m FROM test_emp GROUP BY gender HAVING m < CAST('2020-01-01' AS DATE) ORDER BY gender; + + g | m +---------------+------------------------ +null |1963-06-07T00:00:00.000Z +F |1964-10-18T00:00:00.000Z +M |1965-01-03T00:00:00.000Z +; + +aggWithMinMaxOfDatetime +schema::g:s|mx:ts|mn:ts +SELECT gender g, MAX(birth_date) mx, MIN(birth_date) mn FROM test_emp GROUP BY gender HAVING mn < NOW() AND mx > CAST('1950-01-01' AS DATE) ORDER BY gender; + + g | mx | mn +---------------+------------------------+------------------------ +null |1963-06-07T00:00:00.000Z|1953-01-23T00:00:00.000Z +F |1964-10-18T00:00:00.000Z|1952-04-19T00:00:00.000Z +M |1965-01-03T00:00:00.000Z|1952-02-27T00:00:00.000Z +; + +aggWithMinMaxOfDate +schema::g:s|mx:date|mn:date +SELECT gender g, MAX(CAST(birth_date AS DATE)) mx, MIN(CAST(birth_date AS DATE)) mn FROM test_emp GROUP BY gender HAVING mn < CAST('2020-01-01' AS DATE) OR mx < CAST('1980-01-01T12:00:00' AS DATETIME) ORDER BY gender; + + g | mx | mn +---------------+------------------------+------------------------ +null |1963-06-07T00:00:00.000Z|1953-01-23T00:00:00.000Z +F |1964-10-18T00:00:00.000Z|1952-04-19T00:00:00.000Z +M |1965-01-03T00:00:00.000Z|1952-02-27T00:00:00.000Z +; + diff --git a/x-pack/plugin/sql/src/test/java/org/elasticsearch/xpack/sql/planner/QueryTranslatorTests.java b/x-pack/plugin/sql/src/test/java/org/elasticsearch/xpack/sql/planner/QueryTranslatorTests.java index ee12d2a5ee4..363492d077b 100644 --- a/x-pack/plugin/sql/src/test/java/org/elasticsearch/xpack/sql/planner/QueryTranslatorTests.java +++ b/x-pack/plugin/sql/src/test/java/org/elasticsearch/xpack/sql/planner/QueryTranslatorTests.java @@ -913,8 +913,8 @@ public class QueryTranslatorTests extends ESTestCase { "\"aggregations\":{\"" + aggName + "\":{\"max\":{\"field\":\"date\"}},\"" + havingName + "\":" + "{\"bucket_selector\":{\"buckets_path\":{\"a0\":\"" + aggName + "\"},\"script\":{\"source\":\"" + "InternalQlScriptUtils.nullSafeFilter(InternalQlScriptUtils.gt(InternalSqlScriptUtils.coalesce(" + - "[params.a0]),InternalSqlScriptUtils.asDateTime(params.v0)))\",\"lang\":\"painless\",\"params\":" + - "{\"v0\":\"2020-01-01T00:00:00.000Z\"}}")); + "[InternalSqlScriptUtils.asDateTime(params.a0)]),InternalSqlScriptUtils.asDateTime(params.v0)))\"," + + "\"lang\":\"painless\",\"params\":{\"v0\":\"2020-01-01T00:00:00.000Z\"}}")); assertTrue(esQExec.queryContainer().query() instanceof ScriptQuery); ScriptQuery sq = (ScriptQuery) esQExec.queryContainer().query(); assertEquals("InternalQlScriptUtils.nullSafeFilter(InternalQlScriptUtils.gt(" + @@ -2125,4 +2125,39 @@ public class QueryTranslatorTests extends ESTestCase { } } } + + public void testScriptsInsideAggregateFunctions_WithDatetimeField() { + PhysicalPlan p = optimizeAndPlan("SELECT MAX(date) FROM test HAVING MAX(date) > CAST('2020-05-03T12:34:56.000Z' AS DATETIME)"); + assertEquals(EsQueryExec.class, p.getClass()); + EsQueryExec eqe = (EsQueryExec) p; + AggregationBuilder aggBuilder = eqe.queryContainer().aggs().asAggBuilder(); + assertEquals(1, aggBuilder.getSubAggregations().size()); + assertEquals(1, aggBuilder.getPipelineAggregations().size()); + String aggName = aggBuilder.getSubAggregations().iterator().next().getName(); + String havingName = aggBuilder.getPipelineAggregations().iterator().next().getName(); + assertThat(eqe.queryContainer().toString().replaceAll("\\s+", ""), containsString( + "\"aggregations\":{\"" + aggName + "\":{\"max\":{\"field\":\"date\"}},\"" + havingName + "\":{\"bucket_selector\":" + + "{\"buckets_path\":{\"a0\":\"" + aggName + "\"},\"script\":{\"source\":\"InternalQlScriptUtils.nullSafeFilter(" + + "InternalQlScriptUtils.gt(InternalSqlScriptUtils.asDateTime(params.a0),InternalSqlScriptUtils.asDateTime(params.v0)))\"," + + "\"lang\":\"painless\",\"params\":{\"v0\":\"2020-05-03T12:34:56.000Z\"}},\"gap_policy\":\"skip\"}}}}}}")); + } + + public void testScriptsInsideAggregateFunctions_WithDateField_AndExtendedStats() { + PhysicalPlan p = optimizeAndPlan("SELECT MIN(CAST(date AS DATE)), MAX(CAST(date AS DATE)) FROM test HAVING " + + "MIN(CAST(date AS DATE)) > CAST('2020-05-03T12:34:56.000Z' AS DATE)"); + assertEquals(EsQueryExec.class, p.getClass()); + EsQueryExec eqe = (EsQueryExec) p; + AggregationBuilder aggBuilder = eqe.queryContainer().aggs().asAggBuilder(); + assertEquals(1, aggBuilder.getSubAggregations().size()); + assertEquals(1, aggBuilder.getPipelineAggregations().size()); + String aggName = aggBuilder.getSubAggregations().iterator().next().getName(); + String havingName = aggBuilder.getPipelineAggregations().iterator().next().getName(); + assertThat(eqe.queryContainer().toString().replaceAll("\\s+", ""), containsString( + "\"aggregations\":{\"" + aggName + "\":{\"stats\":{\"script\":{\"source\":\"InternalSqlScriptUtils.cast(" + + "InternalQlScriptUtils.docValue(doc,params.v0),params.v1)\",\"lang\":\"painless\",\"params\":" + + "{\"v0\":\"date\",\"v1\":\"DATE\"}}}},\"" + havingName + "\":{\"bucket_selector\":{\"buckets_path\":" + + "{\"a0\":\"" + aggName + ".min\"},\"script\":{\"source\":\"InternalQlScriptUtils.nullSafeFilter(InternalQlScriptUtils.gt(" + + "InternalSqlScriptUtils.asDateTime(params.a0),InternalSqlScriptUtils.asDateTime(params.v0)))\",\"lang\":\"painless\"," + + "\"params\":{\"v0\":\"2020-05-03T00:00:00.000Z\"}},\"gap_policy\":\"skip\"}}}}}}")); + } }