SQL: handle MIN and MAX functions on dates in Painless scripts (#57605) (#57863)

* Convert to date/datetime the result of numeric aggregations (min, max)
in Painless scripts

(cherry picked from commit f1de99e2a6fbf3806c4f2b6b809738aa8faa2d75)
This commit is contained in:
Andrei Stefan 2020-06-09 10:09:01 +03:00 committed by GitHub
parent 9eaee3da8d
commit 3cc8166946
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 130 additions and 4 deletions

View File

@ -14,6 +14,7 @@ import org.elasticsearch.xpack.ql.expression.function.grouping.GroupingFunction;
import org.elasticsearch.xpack.ql.expression.gen.script.ScriptTemplate;
import org.elasticsearch.xpack.ql.expression.gen.script.Scripts;
import org.elasticsearch.xpack.ql.tree.Source;
import org.elasticsearch.xpack.ql.type.DataTypes;
import org.elasticsearch.xpack.ql.util.DateUtils;
import java.time.OffsetTime;
@ -108,19 +109,30 @@ public abstract class ScalarFunction extends Function {
}
protected ScriptTemplate scriptWithAggregate(AggregateFunction aggregate) {
String template = "{}";
String template = basicTemplate(aggregate);
return new ScriptTemplate(processScript(template),
paramsBuilder().agg(aggregate).build(),
dataType());
}
// This method isn't actually used at the moment, since there is no grouping function (ie HISTOGRAM)
// that currently results in a script being generated
protected ScriptTemplate scriptWithGrouping(GroupingFunction grouping) {
String template = "{}";
String template = basicTemplate(grouping);
return new ScriptTemplate(processScript(template),
paramsBuilder().grouping(grouping).build(),
dataType());
}
// FIXME: this needs to be refactored to account for different datatypes in different projects (ie DATE from SQL)
private String basicTemplate(Function function) {
if (function.dataType().name().equals("DATE") || function.dataType() == DataTypes.DATETIME) {
return "{sql}.asDateTime({})";
} else {
return "{}";
}
}
protected ScriptTemplate scriptWithField(FieldAttribute field) {
return new ScriptTemplate(processScript(Scripts.DOC_VALUE),
paramsBuilder().variable(field.name()).build(),

View File

@ -1188,3 +1188,82 @@ GROUP BY gender ORDER BY gender;
17811.071545718776|1.2151168881502939E11|3.1723426960671306E8|F
15904.093950318531|1.699198993070239E11 |2.529402043805585E8 |M
;
aggWithMinOfDatesAndCastAsDate
schema::g:s|m:date
SELECT gender g, MIN(CAST(birth_date AS DATE)) m FROM test_emp GROUP BY gender HAVING MIN(CAST(birth_date AS DATE)) < NOW() ORDER BY g;
g | m
---------------+------------------------
null |1953-01-23T00:00:00.000Z
F |1952-04-19T00:00:00.000Z
M |1952-02-27T00:00:00.000Z
;
aggWithMinOfDatetime
schema::g:s|m:ts
SELECT gender g, MIN(birth_date) m FROM test_emp GROUP BY gender HAVING m < NOW() ORDER BY gender;
g | m
---------------+------------------------
null |1953-01-23T00:00:00.000Z
F |1952-04-19T00:00:00.000Z
M |1952-02-27T00:00:00.000Z
;
aggWithMinOfDatetimeAndDate
schema::g:s|mc:date|m:ts
SELECT gender g, MIN(CAST(birth_date AS DATE)) mc, MIN(birth_date) m FROM test_emp GROUP BY gender HAVING MIN(CAST(birth_date AS DATE)) < NOW() AND MIN(birth_date) <= CURRENT_TIMESTAMP() ORDER BY g;
g | mc | m
---------------+------------------------+------------------------
null |1953-01-23T00:00:00.000Z|1953-01-23T00:00:00.000Z
F |1952-04-19T00:00:00.000Z|1952-04-19T00:00:00.000Z
M |1952-02-27T00:00:00.000Z|1952-02-27T00:00:00.000Z
;
aggWithMaxOfDatetime
schema::g:s|m:ts
SELECT gender g, MAX(birth_date) m FROM test_emp GROUP BY gender HAVING m < NOW() ORDER BY gender;
g | m
---------------+------------------------
null |1963-06-07T00:00:00.000Z
F |1964-10-18T00:00:00.000Z
M |1965-01-03T00:00:00.000Z
;
aggWithMaxOfDate
schema::g:s|m:date
SELECT gender g, MAX(CAST(birth_date AS DATE)) m FROM test_emp GROUP BY gender HAVING m < CAST('2020-01-01' AS DATE) ORDER BY gender;
g | m
---------------+------------------------
null |1963-06-07T00:00:00.000Z
F |1964-10-18T00:00:00.000Z
M |1965-01-03T00:00:00.000Z
;
aggWithMinMaxOfDatetime
schema::g:s|mx:ts|mn:ts
SELECT gender g, MAX(birth_date) mx, MIN(birth_date) mn FROM test_emp GROUP BY gender HAVING mn < NOW() AND mx > CAST('1950-01-01' AS DATE) ORDER BY gender;
g | mx | mn
---------------+------------------------+------------------------
null |1963-06-07T00:00:00.000Z|1953-01-23T00:00:00.000Z
F |1964-10-18T00:00:00.000Z|1952-04-19T00:00:00.000Z
M |1965-01-03T00:00:00.000Z|1952-02-27T00:00:00.000Z
;
aggWithMinMaxOfDate
schema::g:s|mx:date|mn:date
SELECT gender g, MAX(CAST(birth_date AS DATE)) mx, MIN(CAST(birth_date AS DATE)) mn FROM test_emp GROUP BY gender HAVING mn < CAST('2020-01-01' AS DATE) OR mx < CAST('1980-01-01T12:00:00' AS DATETIME) ORDER BY gender;
g | mx | mn
---------------+------------------------+------------------------
null |1963-06-07T00:00:00.000Z|1953-01-23T00:00:00.000Z
F |1964-10-18T00:00:00.000Z|1952-04-19T00:00:00.000Z
M |1965-01-03T00:00:00.000Z|1952-02-27T00:00:00.000Z
;

View File

@ -913,8 +913,8 @@ public class QueryTranslatorTests extends ESTestCase {
"\"aggregations\":{\"" + aggName + "\":{\"max\":{\"field\":\"date\"}},\"" + havingName + "\":" +
"{\"bucket_selector\":{\"buckets_path\":{\"a0\":\"" + aggName + "\"},\"script\":{\"source\":\"" +
"InternalQlScriptUtils.nullSafeFilter(InternalQlScriptUtils.gt(InternalSqlScriptUtils.coalesce(" +
"[params.a0]),InternalSqlScriptUtils.asDateTime(params.v0)))\",\"lang\":\"painless\",\"params\":" +
"{\"v0\":\"2020-01-01T00:00:00.000Z\"}}"));
"[InternalSqlScriptUtils.asDateTime(params.a0)]),InternalSqlScriptUtils.asDateTime(params.v0)))\"," +
"\"lang\":\"painless\",\"params\":{\"v0\":\"2020-01-01T00:00:00.000Z\"}}"));
assertTrue(esQExec.queryContainer().query() instanceof ScriptQuery);
ScriptQuery sq = (ScriptQuery) esQExec.queryContainer().query();
assertEquals("InternalQlScriptUtils.nullSafeFilter(InternalQlScriptUtils.gt(" +
@ -2125,4 +2125,39 @@ public class QueryTranslatorTests extends ESTestCase {
}
}
}
public void testScriptsInsideAggregateFunctions_WithDatetimeField() {
PhysicalPlan p = optimizeAndPlan("SELECT MAX(date) FROM test HAVING MAX(date) > CAST('2020-05-03T12:34:56.000Z' AS DATETIME)");
assertEquals(EsQueryExec.class, p.getClass());
EsQueryExec eqe = (EsQueryExec) p;
AggregationBuilder aggBuilder = eqe.queryContainer().aggs().asAggBuilder();
assertEquals(1, aggBuilder.getSubAggregations().size());
assertEquals(1, aggBuilder.getPipelineAggregations().size());
String aggName = aggBuilder.getSubAggregations().iterator().next().getName();
String havingName = aggBuilder.getPipelineAggregations().iterator().next().getName();
assertThat(eqe.queryContainer().toString().replaceAll("\\s+", ""), containsString(
"\"aggregations\":{\"" + aggName + "\":{\"max\":{\"field\":\"date\"}},\"" + havingName + "\":{\"bucket_selector\":"
+ "{\"buckets_path\":{\"a0\":\"" + aggName + "\"},\"script\":{\"source\":\"InternalQlScriptUtils.nullSafeFilter("
+ "InternalQlScriptUtils.gt(InternalSqlScriptUtils.asDateTime(params.a0),InternalSqlScriptUtils.asDateTime(params.v0)))\","
+ "\"lang\":\"painless\",\"params\":{\"v0\":\"2020-05-03T12:34:56.000Z\"}},\"gap_policy\":\"skip\"}}}}}}"));
}
public void testScriptsInsideAggregateFunctions_WithDateField_AndExtendedStats() {
PhysicalPlan p = optimizeAndPlan("SELECT MIN(CAST(date AS DATE)), MAX(CAST(date AS DATE)) FROM test HAVING "
+ "MIN(CAST(date AS DATE)) > CAST('2020-05-03T12:34:56.000Z' AS DATE)");
assertEquals(EsQueryExec.class, p.getClass());
EsQueryExec eqe = (EsQueryExec) p;
AggregationBuilder aggBuilder = eqe.queryContainer().aggs().asAggBuilder();
assertEquals(1, aggBuilder.getSubAggregations().size());
assertEquals(1, aggBuilder.getPipelineAggregations().size());
String aggName = aggBuilder.getSubAggregations().iterator().next().getName();
String havingName = aggBuilder.getPipelineAggregations().iterator().next().getName();
assertThat(eqe.queryContainer().toString().replaceAll("\\s+", ""), containsString(
"\"aggregations\":{\"" + aggName + "\":{\"stats\":{\"script\":{\"source\":\"InternalSqlScriptUtils.cast("
+ "InternalQlScriptUtils.docValue(doc,params.v0),params.v1)\",\"lang\":\"painless\",\"params\":"
+ "{\"v0\":\"date\",\"v1\":\"DATE\"}}}},\"" + havingName + "\":{\"bucket_selector\":{\"buckets_path\":"
+ "{\"a0\":\"" + aggName + ".min\"},\"script\":{\"source\":\"InternalQlScriptUtils.nullSafeFilter(InternalQlScriptUtils.gt("
+ "InternalSqlScriptUtils.asDateTime(params.a0),InternalSqlScriptUtils.asDateTime(params.v0)))\",\"lang\":\"painless\","
+ "\"params\":{\"v0\":\"2020-05-03T00:00:00.000Z\"}},\"gap_policy\":\"skip\"}}}}}}"));
}
}