diff --git a/docs/reference/sql/functions/grouping.asciidoc b/docs/reference/sql/functions/grouping.asciidoc index 0de9e396fdd..f9b2163c375 100644 --- a/docs/reference/sql/functions/grouping.asciidoc +++ b/docs/reference/sql/functions/grouping.asciidoc @@ -87,8 +87,8 @@ actually used will be `INTERVAL '2' DAY`. If the interval specified is less than [IMPORTANT] All intervals specified for a date/time HISTOGRAM will use a <> -in their `date_histogram` aggregation definition, with the notable exceptions of `INTERVAL '1' YEAR` AND `INTERVAL '1' MONTH` where a calendar interval is used. -The choice for a calendar interval was made for having a more intuitive result for YEAR and MONTH groupings. In the case of YEAR, for example, the calendar intervals consider a one year +in their `date_histogram` aggregation definition, with the notable exceptions of `INTERVAL '1' YEAR`, `INTERVAL '1' MONTH` and `INTERVAL '1' DAY` where a calendar interval is used. +The choice for a calendar interval was made for having a more intuitive result for YEAR, MONTH and DAY groupings. In the case of YEAR, for example, the calendar intervals consider a one year bucket as the one starting on January 1st that specific year, whereas a fixed interval one-year-bucket considers one year as a number of milliseconds (for example, `31536000000ms` corresponding to 365 days, 24 hours per day, 60 minutes per hour etc.). With fixed intervals, the day of February 5th, 2019 for example, belongs to a bucket that starts on December 20th, 2018 and {es} (and implicitly {es-sql}) would diff --git a/x-pack/plugin/sql/qa/src/main/resources/agg.csv-spec b/x-pack/plugin/sql/qa/src/main/resources/agg.csv-spec index 5eb7e0054c4..9c23a832ec8 100644 --- a/x-pack/plugin/sql/qa/src/main/resources/agg.csv-spec +++ b/x-pack/plugin/sql/qa/src/main/resources/agg.csv-spec @@ -531,6 +531,30 @@ null |10 |null 1953-11-01T00:00:00.000Z|1 |1953-11-07T00:00:00.000Z ; +histogramOneDay +schema::h:ts|c:l|birth_date:ts +SELECT HISTOGRAM(birth_date, INTERVAL 1 DAY) AS h, COUNT(*) as c, birth_date FROM test_emp WHERE YEAR(birth_date) BETWEEN 1959 AND 1960 GROUP BY h, birth_date ORDER BY h ASC; + + h | c | birth_date +------------------------+---------------+------------------------ +1959-01-27T00:00:00.000Z|1 |1959-01-27T00:00:00.000Z +1959-04-07T00:00:00.000Z|1 |1959-04-07T00:00:00.000Z +1959-07-23T00:00:00.000Z|2 |1959-07-23T00:00:00.000Z +1959-08-10T00:00:00.000Z|1 |1959-08-10T00:00:00.000Z +1959-08-19T00:00:00.000Z|1 |1959-08-19T00:00:00.000Z +1959-10-01T00:00:00.000Z|1 |1959-10-01T00:00:00.000Z +1959-12-03T00:00:00.000Z|1 |1959-12-03T00:00:00.000Z +1959-12-25T00:00:00.000Z|1 |1959-12-25T00:00:00.000Z +1960-02-20T00:00:00.000Z|1 |1960-02-20T00:00:00.000Z +1960-03-09T00:00:00.000Z|1 |1960-03-09T00:00:00.000Z +1960-05-25T00:00:00.000Z|1 |1960-05-25T00:00:00.000Z +1960-07-20T00:00:00.000Z|1 |1960-07-20T00:00:00.000Z +1960-08-09T00:00:00.000Z|1 |1960-08-09T00:00:00.000Z +1960-09-06T00:00:00.000Z|1 |1960-09-06T00:00:00.000Z +1960-10-04T00:00:00.000Z|1 |1960-10-04T00:00:00.000Z +1960-12-17T00:00:00.000Z|1 |1960-12-17T00:00:00.000Z +; + histogramDateTimeWithMonthOnTop schema::h:i|c:l SELECT HISTOGRAM(MONTH(birth_date), 2) AS h, COUNT(*) as c FROM test_emp GROUP BY h ORDER BY h DESC; diff --git a/x-pack/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/expression/function/grouping/Histogram.java b/x-pack/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/expression/function/grouping/Histogram.java index e6ba5f6326c..623822ec713 100644 --- a/x-pack/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/expression/function/grouping/Histogram.java +++ b/x-pack/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/expression/function/grouping/Histogram.java @@ -31,6 +31,7 @@ public class Histogram extends GroupingFunction { private final ZoneId zoneId; public static String YEAR_INTERVAL = DateHistogramInterval.YEAR.toString(); public static String MONTH_INTERVAL = DateHistogramInterval.MONTH.toString(); + public static String DAY_INTERVAL = DateHistogramInterval.DAY.toString(); public Histogram(Source source, Expression field, Expression interval, ZoneId zoneId) { super(source, field, Collections.singletonList(interval)); diff --git a/x-pack/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/planner/QueryFolder.java b/x-pack/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/planner/QueryFolder.java index f6987e88c9d..83b5ae9d626 100644 --- a/x-pack/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/planner/QueryFolder.java +++ b/x-pack/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/planner/QueryFolder.java @@ -41,6 +41,7 @@ import org.elasticsearch.xpack.sql.expression.function.aggregate.CompoundNumeric import org.elasticsearch.xpack.sql.expression.function.aggregate.TopHits; import org.elasticsearch.xpack.sql.expression.function.grouping.Histogram; import org.elasticsearch.xpack.sql.expression.function.scalar.datetime.DateTimeHistogramFunction; +import org.elasticsearch.xpack.sql.expression.literal.interval.IntervalDayTime; import org.elasticsearch.xpack.sql.expression.literal.interval.IntervalYearMonth; import org.elasticsearch.xpack.sql.expression.literal.interval.Intervals; import org.elasticsearch.xpack.sql.plan.logical.Pivot; @@ -80,6 +81,7 @@ import org.elasticsearch.xpack.sql.session.EmptyExecutable; import org.elasticsearch.xpack.sql.util.Check; import org.elasticsearch.xpack.sql.util.DateUtils; +import java.time.Duration; import java.time.Period; import java.util.ArrayList; import java.util.Arrays; @@ -90,6 +92,7 @@ import java.util.Map.Entry; import java.util.concurrent.atomic.AtomicReference; import static org.elasticsearch.xpack.ql.util.CollectionUtils.combine; +import static org.elasticsearch.xpack.sql.expression.function.grouping.Histogram.DAY_INTERVAL; import static org.elasticsearch.xpack.sql.expression.function.grouping.Histogram.MONTH_INTERVAL; import static org.elasticsearch.xpack.sql.expression.function.grouping.Histogram.YEAR_INTERVAL; import static org.elasticsearch.xpack.sql.planner.QueryTranslator.toAgg; @@ -332,14 +335,24 @@ class QueryFolder extends RuleExecutor { // When the histogram is `INTERVAL '1' YEAR` or `INTERVAL '1' MONTH`, the interval used in // the ES date_histogram will be a calendar_interval with value "1y" or "1M" respectively. - // All other intervals will be fixed_intervals expressed in ms. if (field instanceof FieldAttribute) { key = new GroupByDateHistogram(aggId, QueryTranslator.nameOf(field), calendarInterval, h.zoneId()); } else if (field instanceof Function) { key = new GroupByDateHistogram(aggId, ((Function) field).asScript(), calendarInterval, h.zoneId()); } } - // typical interval + // interval of exactly 1 day + else if (value instanceof IntervalDayTime + && ((IntervalDayTime) value).interval().equals(Duration.ofDays(1))) { + // When the histogram is `INTERVAL '1' DAY` the interval used in + // the ES date_histogram will be a calendar_interval with value "1d" + if (field instanceof FieldAttribute) { + key = new GroupByDateHistogram(aggId, QueryTranslator.nameOf(field), DAY_INTERVAL, h.zoneId()); + } else if (field instanceof Function) { + key = new GroupByDateHistogram(aggId, ((Function) field).asScript(), DAY_INTERVAL, h.zoneId()); + } + } + // All other intervals will be fixed_intervals expressed in ms. else { long intervalAsMillis = Intervals.inMillis(h.interval()); diff --git a/x-pack/plugin/sql/src/test/java/org/elasticsearch/xpack/sql/planner/QueryTranslatorTests.java b/x-pack/plugin/sql/src/test/java/org/elasticsearch/xpack/sql/planner/QueryTranslatorTests.java index 16c6237dd05..0b5716c34a9 100644 --- a/x-pack/plugin/sql/src/test/java/org/elasticsearch/xpack/sql/planner/QueryTranslatorTests.java +++ b/x-pack/plugin/sql/src/test/java/org/elasticsearch/xpack/sql/planner/QueryTranslatorTests.java @@ -1078,6 +1078,45 @@ public class QueryTranslatorTests extends ESTestCase { + "\"fixed_interval\":\"12960000000ms\",\"time_zone\":\"Z\"}}}]}}}")); } + public void testGroupByOneDayHistogramQueryTranslator() { + PhysicalPlan p = optimizeAndPlan("SELECT HISTOGRAM(date, INTERVAL 1 DAY) AS h FROM test GROUP BY h"); + assertEquals(EsQueryExec.class, p.getClass()); + EsQueryExec eqe = (EsQueryExec) p; + assertEquals(1, eqe.output().size()); + assertEquals("h", eqe.output().get(0).qualifiedName()); + assertEquals(DATETIME, eqe.output().get(0).dataType()); + assertThat(eqe.queryContainer().aggs().asAggBuilder().toString().replaceAll("\\s+", ""), + endsWith("\"date_histogram\":{\"field\":\"date\",\"missing_bucket\":true,\"value_type\":\"date\",\"order\":\"asc\"," + + "\"calendar_interval\":\"1d\",\"time_zone\":\"Z\"}}}]}}}")); + } + + public void testGroupByMoreDaysHistogramQueryTranslator() { + PhysicalPlan p = optimizeAndPlan("SELECT HISTOGRAM(date, INTERVAL '1 5' DAY TO HOUR) AS h FROM test GROUP BY h"); + assertEquals(EsQueryExec.class, p.getClass()); + EsQueryExec eqe = (EsQueryExec) p; + assertEquals(1, eqe.output().size()); + assertEquals("h", eqe.output().get(0).qualifiedName()); + assertEquals(DATETIME, eqe.output().get(0).dataType()); + assertThat(eqe.queryContainer().aggs().asAggBuilder().toString().replaceAll("\\s+", ""), + endsWith("\"date_histogram\":{\"field\":\"date\",\"missing_bucket\":true,\"value_type\":\"date\",\"order\":\"asc\"," + + "\"fixed_interval\":\"104400000ms\",\"time_zone\":\"Z\"}}}]}}}")); + } + + public void testGroupByMoreDaysHistogram_WithFunction_QueryTranslator() { + PhysicalPlan p = optimizeAndPlan("SELECT HISTOGRAM(date + INTERVAL 5 DAYS, INTERVAL 1 DAY) AS h FROM test GROUP BY h"); + assertEquals(EsQueryExec.class, p.getClass()); + EsQueryExec eqe = (EsQueryExec) p; + assertEquals(1, eqe.output().size()); + assertEquals("h", eqe.output().get(0).qualifiedName()); + assertEquals(DATETIME, eqe.output().get(0).dataType()); + assertThat(eqe.queryContainer().aggs().asAggBuilder().toString().replaceAll("\\s+", ""), + endsWith("\"date_histogram\":{\"script\":{\"source\":\"InternalSqlScriptUtils.add(" + + "InternalSqlScriptUtils.docValue(doc,params.v0),InternalSqlScriptUtils.intervalDayTime(params.v1,params.v2))\"," + + "\"lang\":\"painless\",\"params\":{\"v0\":\"date\",\"v1\":\"PT120H\",\"v2\":\"INTERVAL_DAY\"}}," + + "\"missing_bucket\":true,\"value_type\":\"long\",\"order\":\"asc\"," + + "\"calendar_interval\":\"1d\",\"time_zone\":\"Z\"}}}]}}}")); + } + public void testGroupByYearAndScalarsQueryTranslator() { PhysicalPlan p = optimizeAndPlan("SELECT YEAR(CAST(date + INTERVAL 5 months AS DATE)) FROM test GROUP BY 1"); assertEquals(EsQueryExec.class, p.getClass());