SQL: Fix issue with GROUP BY YEAR() (#49559)

Grouping By YEAR() is translated to a histogram aggregation, but
previously if there was a scalar function invloved (e.g.:
`YEAR(date + INTERVAL 2 YEARS)`), there was no proper script created
and the histogram was applied on a field with name: `date + INTERVAL 2 YEARS`
which doesn't make sense, and resulted in null result.

Check the underlying field of YEAR() and if it's a function call
`asScript()` to properly get the painless script on which the histogram
is applied.

Fixes: #49386
(cherry picked from commit 93c37abc943d00d3a14ba08435d118a6d48874c7)
This commit is contained in:
Marios Trivyzas 2019-11-26 13:23:41 +01:00
parent cf5f013033
commit b0cb7bf229
3 changed files with 53 additions and 4 deletions

View File

@ -534,6 +534,29 @@ SELECT HISTOGRAM(YEAR(birth_date), 2) AS h, COUNT(*) as c FROM test_emp GROUP BY
null |10
;
histogramYearOnDateTimeWithScalars
schema::year:i|c:l
SELECT YEAR(CAST(birth_date + INTERVAL 5 YEARS AS DATE) + INTERVAL 20 MONTHS) AS year, COUNT(*) as c FROM test_emp GROUP BY 1;
year | c
---------------+---------------
null |10
1958 |2
1959 |12
1960 |7
1961 |7
1962 |4
1963 |5
1964 |5
1965 |7
1966 |9
1967 |7
1968 |7
1969 |8
1970 |6
1971 |4
;
histogramNumericWithExpression
schema::h:i|c:l
SELECT HISTOGRAM(emp_no % 100, 10) AS h, COUNT(*) as c FROM test_emp GROUP BY h ORDER BY h DESC;

View File

@ -283,10 +283,20 @@ final class QueryTranslator {
// dates are handled differently because of date histograms
if (exp instanceof DateTimeHistogramFunction) {
DateTimeHistogramFunction dthf = (DateTimeHistogramFunction) exp;
if (dthf.calendarInterval() != null) {
key = new GroupByDateHistogram(aggId, nameOf(exp), dthf.calendarInterval(), dthf.zoneId());
} else {
key = new GroupByDateHistogram(aggId, nameOf(exp), dthf.fixedInterval(), dthf.zoneId());
Expression field = dthf.field();
if (field instanceof FieldAttribute) {
if (dthf.calendarInterval() != null) {
key = new GroupByDateHistogram(aggId, nameOf(field), dthf.calendarInterval(), dthf.zoneId());
} else {
key = new GroupByDateHistogram(aggId, nameOf(field), dthf.fixedInterval(), dthf.zoneId());
}
} else if (field instanceof Function) {
ScriptTemplate script = ((Function) field).asScript();
if (dthf.calendarInterval() != null) {
key = new GroupByDateHistogram(aggId, script, dthf.calendarInterval(), dthf.zoneId());
} else {
key = new GroupByDateHistogram(aggId, script, dthf.fixedInterval(), dthf.zoneId());
}
}
}
// all other scalar functions become a script

View File

@ -999,6 +999,22 @@ public class QueryTranslatorTests extends ESTestCase {
+ "\"calendar_interval\":\"1y\",\"time_zone\":\"Z\"}}}]}}}"));
}
public void testGroupByYearAndScalarsQueryTranslator() {
PhysicalPlan p = optimizeAndPlan("SELECT YEAR(CAST(date + INTERVAL 5 months AS DATE)) FROM test GROUP BY 1");
assertEquals(EsQueryExec.class, p.getClass());
EsQueryExec eqe = (EsQueryExec) p;
assertEquals(1, eqe.output().size());
assertEquals("YEAR(CAST(date + INTERVAL 5 months AS DATE))", eqe.output().get(0).qualifiedName());
assertEquals(DataType.INTEGER, eqe.output().get(0).dataType());
assertThat(eqe.queryContainer().aggs().asAggBuilder().toString().replaceAll("\\s+", ""),
endsWith("\"date_histogram\":{\"script\":{\"source\":\"InternalSqlScriptUtils.cast(" +
"InternalSqlScriptUtils.add(InternalSqlScriptUtils.docValue(doc,params.v0)," +
"InternalSqlScriptUtils.intervalYearMonth(params.v1,params.v2)),params.v3)\"," +
"\"lang\":\"painless\",\"params\":{\"v0\":\"date\",\"v1\":\"P5M\",\"v2\":\"INTERVAL_MONTH\"," +
"\"v3\":\"DATE\"}},\"missing_bucket\":true,\"value_type\":\"long\",\"order\":\"asc\"," +
"\"calendar_interval\":\"1y\",\"time_zone\":\"Z\"}}}]}}}"));
}
public void testGroupByHistogramWithDate() {
LogicalPlan p = plan("SELECT MAX(int) FROM test GROUP BY HISTOGRAM(CAST(date AS DATE), INTERVAL 2 MONTHS)");
assertTrue(p instanceof Aggregate);