From ed6b10bc038d7658dca157bcf4d74625855ca9b2 Mon Sep 17 00:00:00 2001 From: Andrei Stefan Date: Tue, 25 Feb 2020 01:41:51 +0200 Subject: [PATCH] SQL: use a calendar interval for histograms over 1 month intervals (#52586) (#52715) (cherry picked from commit 928b11a34ec92d90d082abdf4fa09f7ce1d7c0c4) --- .../reference/sql/functions/grouping.asciidoc | 4 +-- .../sql/qa/src/main/resources/agg.csv-spec | 28 +++++++++++++++++++ .../function/grouping/Histogram.java | 3 ++ .../function/scalar/datetime/Year.java | 6 ++-- .../xpack/sql/planner/QueryFolder.java | 20 +++++++------ .../sql/planner/QueryTranslatorTests.java | 24 ++++++++++++++++ 6 files changed, 71 insertions(+), 14 deletions(-) diff --git a/docs/reference/sql/functions/grouping.asciidoc b/docs/reference/sql/functions/grouping.asciidoc index fe0f4c08d8c..0de9e396fdd 100644 --- a/docs/reference/sql/functions/grouping.asciidoc +++ b/docs/reference/sql/functions/grouping.asciidoc @@ -87,8 +87,8 @@ actually used will be `INTERVAL '2' DAY`. If the interval specified is less than [IMPORTANT] All intervals specified for a date/time HISTOGRAM will use a <> -in their `date_histogram` aggregation definition, with the notable exception of `INTERVAL '1' YEAR` where a calendar interval is used. -The choice for a calendar interval was made for having a more intuitive result for YEAR groupings. Calendar intervals consider a one year +in their `date_histogram` aggregation definition, with the notable exceptions of `INTERVAL '1' YEAR` AND `INTERVAL '1' MONTH` where a calendar interval is used. +The choice for a calendar interval was made for having a more intuitive result for YEAR and MONTH groupings. In the case of YEAR, for example, the calendar intervals consider a one year bucket as the one starting on January 1st that specific year, whereas a fixed interval one-year-bucket considers one year as a number of milliseconds (for example, `31536000000ms` corresponding to 365 days, 24 hours per day, 60 minutes per hour etc.). With fixed intervals, the day of February 5th, 2019 for example, belongs to a bucket that starts on December 20th, 2018 and {es} (and implicitly {es-sql}) would diff --git a/x-pack/plugin/sql/qa/src/main/resources/agg.csv-spec b/x-pack/plugin/sql/qa/src/main/resources/agg.csv-spec index da9fe1cd7c8..5eb7e0054c4 100644 --- a/x-pack/plugin/sql/qa/src/main/resources/agg.csv-spec +++ b/x-pack/plugin/sql/qa/src/main/resources/agg.csv-spec @@ -503,6 +503,34 @@ SELECT HISTOGRAM(birth_date, INTERVAL 1 YEAR) AS h, COUNT(*) as c FROM test_emp null |10 ; +histogramOneMonth +schema::h:ts|c:l|birth_date:ts +SELECT HISTOGRAM(birth_date, INTERVAL 1 MONTH) AS h, COUNT(*) as c, birth_date FROM test_emp GROUP BY h, birth_date HAVING c >= 1 ORDER BY h ASC LIMIT 20; + + h | c | birth_date +------------------------+---------------+------------------------ +null |10 |null +1952-02-01T00:00:00.000Z|1 |1952-02-27T00:00:00.000Z +1952-04-01T00:00:00.000Z|1 |1952-04-19T00:00:00.000Z +1952-05-01T00:00:00.000Z|1 |1952-05-15T00:00:00.000Z +1952-06-01T00:00:00.000Z|1 |1952-06-13T00:00:00.000Z +1952-07-01T00:00:00.000Z|1 |1952-07-08T00:00:00.000Z +1952-08-01T00:00:00.000Z|1 |1952-08-06T00:00:00.000Z +1952-11-01T00:00:00.000Z|1 |1952-11-13T00:00:00.000Z +1952-12-01T00:00:00.000Z|1 |1952-12-24T00:00:00.000Z +1953-01-01T00:00:00.000Z|1 |1953-01-07T00:00:00.000Z +1953-01-01T00:00:00.000Z|1 |1953-01-23T00:00:00.000Z +1953-02-01T00:00:00.000Z|1 |1953-02-08T00:00:00.000Z +1953-04-01T00:00:00.000Z|1 |1953-04-03T00:00:00.000Z +1953-04-01T00:00:00.000Z|1 |1953-04-20T00:00:00.000Z +1953-04-01T00:00:00.000Z|1 |1953-04-21T00:00:00.000Z +1953-07-01T00:00:00.000Z|1 |1953-07-28T00:00:00.000Z +1953-09-01T00:00:00.000Z|1 |1953-09-02T00:00:00.000Z +1953-09-01T00:00:00.000Z|1 |1953-09-19T00:00:00.000Z +1953-09-01T00:00:00.000Z|1 |1953-09-29T00:00:00.000Z +1953-11-01T00:00:00.000Z|1 |1953-11-07T00:00:00.000Z +; + histogramDateTimeWithMonthOnTop schema::h:i|c:l SELECT HISTOGRAM(MONTH(birth_date), 2) AS h, COUNT(*) as c FROM test_emp GROUP BY h ORDER BY h DESC; diff --git a/x-pack/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/expression/function/grouping/Histogram.java b/x-pack/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/expression/function/grouping/Histogram.java index b7ec8c1d5e5..e6ba5f6326c 100644 --- a/x-pack/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/expression/function/grouping/Histogram.java +++ b/x-pack/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/expression/function/grouping/Histogram.java @@ -6,6 +6,7 @@ package org.elasticsearch.xpack.sql.expression.function.grouping; +import org.elasticsearch.search.aggregations.bucket.histogram.DateHistogramInterval; import org.elasticsearch.xpack.ql.expression.Expression; import org.elasticsearch.xpack.ql.expression.Expressions.ParamOrdinal; import org.elasticsearch.xpack.ql.expression.Literal; @@ -28,6 +29,8 @@ public class Histogram extends GroupingFunction { private final Literal interval; private final ZoneId zoneId; + public static String YEAR_INTERVAL = DateHistogramInterval.YEAR.toString(); + public static String MONTH_INTERVAL = DateHistogramInterval.MONTH.toString(); public Histogram(Source source, Expression field, Expression interval, ZoneId zoneId) { super(source, field, Collections.singletonList(interval)); diff --git a/x-pack/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/expression/function/scalar/datetime/Year.java b/x-pack/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/expression/function/scalar/datetime/Year.java index fd48fedd7f4..ed7fb93debc 100644 --- a/x-pack/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/expression/function/scalar/datetime/Year.java +++ b/x-pack/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/expression/function/scalar/datetime/Year.java @@ -5,10 +5,10 @@ */ package org.elasticsearch.xpack.sql.expression.function.scalar.datetime; -import org.elasticsearch.search.aggregations.bucket.histogram.DateHistogramInterval; import org.elasticsearch.xpack.ql.expression.Expression; import org.elasticsearch.xpack.ql.tree.NodeInfo.NodeCtor2; import org.elasticsearch.xpack.ql.tree.Source; +import org.elasticsearch.xpack.sql.expression.function.grouping.Histogram; import org.elasticsearch.xpack.sql.expression.function.scalar.datetime.DateTimeProcessor.DateTimeExtractor; import java.time.ZoneId; @@ -18,8 +18,6 @@ import java.time.ZoneId; */ public class Year extends DateTimeHistogramFunction { - public static String YEAR_INTERVAL = DateHistogramInterval.YEAR.toString(); - public Year(Source source, Expression field, ZoneId zoneId) { super(source, field, zoneId, DateTimeExtractor.YEAR); } @@ -41,6 +39,6 @@ public class Year extends DateTimeHistogramFunction { @Override public String calendarInterval() { - return YEAR_INTERVAL; + return Histogram.YEAR_INTERVAL; } } diff --git a/x-pack/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/planner/QueryFolder.java b/x-pack/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/planner/QueryFolder.java index 92c50652b84..f6987e88c9d 100644 --- a/x-pack/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/planner/QueryFolder.java +++ b/x-pack/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/planner/QueryFolder.java @@ -41,7 +41,6 @@ import org.elasticsearch.xpack.sql.expression.function.aggregate.CompoundNumeric import org.elasticsearch.xpack.sql.expression.function.aggregate.TopHits; import org.elasticsearch.xpack.sql.expression.function.grouping.Histogram; import org.elasticsearch.xpack.sql.expression.function.scalar.datetime.DateTimeHistogramFunction; -import org.elasticsearch.xpack.sql.expression.function.scalar.datetime.Year; import org.elasticsearch.xpack.sql.expression.literal.interval.IntervalYearMonth; import org.elasticsearch.xpack.sql.expression.literal.interval.Intervals; import org.elasticsearch.xpack.sql.plan.logical.Pivot; @@ -91,6 +90,8 @@ import java.util.Map.Entry; import java.util.concurrent.atomic.AtomicReference; import static org.elasticsearch.xpack.ql.util.CollectionUtils.combine; +import static org.elasticsearch.xpack.sql.expression.function.grouping.Histogram.MONTH_INTERVAL; +import static org.elasticsearch.xpack.sql.expression.function.grouping.Histogram.YEAR_INTERVAL; import static org.elasticsearch.xpack.sql.planner.QueryTranslator.toAgg; import static org.elasticsearch.xpack.sql.planner.QueryTranslator.toQuery; import static org.elasticsearch.xpack.sql.type.SqlDataTypes.DATE; @@ -283,7 +284,6 @@ class QueryFolder extends RuleExecutor { field = field.exactAttribute(); key = new GroupByValue(aggId, field.name()); } - // handle functions else if (exp instanceof Function) { // dates are handled differently because of date histograms @@ -322,13 +322,17 @@ class QueryFolder extends RuleExecutor { // date histogram if (isDateBased(h.dataType())) { Object value = h.interval().value(); - // interval of exactly 1 year - if (value instanceof IntervalYearMonth - && ((IntervalYearMonth) value).interval().equals(Period.ofYears(1))) { - String calendarInterval = Year.YEAR_INTERVAL; - // When the histogram is `INTERVAL '1' YEAR`, the interval used in the ES date_histogram will be - // a calendar_interval with value "1y". All other intervals will be fixed_intervals expressed in ms. + // interval of exactly 1 year or 1 month + if (value instanceof IntervalYearMonth && + (((IntervalYearMonth) value).interval().equals(Period.ofYears(1)) + || ((IntervalYearMonth) value).interval().equals(Period.ofMonths(1)))) { + Period yearMonth = ((IntervalYearMonth) value).interval(); + String calendarInterval = yearMonth.equals(Period.ofYears(1)) ? YEAR_INTERVAL : MONTH_INTERVAL; + + // When the histogram is `INTERVAL '1' YEAR` or `INTERVAL '1' MONTH`, the interval used in + // the ES date_histogram will be a calendar_interval with value "1y" or "1M" respectively. + // All other intervals will be fixed_intervals expressed in ms. if (field instanceof FieldAttribute) { key = new GroupByDateHistogram(aggId, QueryTranslator.nameOf(field), calendarInterval, h.zoneId()); } else if (field instanceof Function) { diff --git a/x-pack/plugin/sql/src/test/java/org/elasticsearch/xpack/sql/planner/QueryTranslatorTests.java b/x-pack/plugin/sql/src/test/java/org/elasticsearch/xpack/sql/planner/QueryTranslatorTests.java index 9da1ad56063..16c6237dd05 100644 --- a/x-pack/plugin/sql/src/test/java/org/elasticsearch/xpack/sql/planner/QueryTranslatorTests.java +++ b/x-pack/plugin/sql/src/test/java/org/elasticsearch/xpack/sql/planner/QueryTranslatorTests.java @@ -1053,6 +1053,30 @@ public class QueryTranslatorTests extends ESTestCase { endsWith("\"date_histogram\":{\"field\":\"date\",\"missing_bucket\":true,\"value_type\":\"date\",\"order\":\"asc\"," + "\"calendar_interval\":\"1y\",\"time_zone\":\"Z\"}}}]}}}")); } + + public void testGroupByOneMonthHistogramQueryTranslator() { + PhysicalPlan p = optimizeAndPlan("SELECT HISTOGRAM(date, INTERVAL 1 MONTH) AS h FROM test GROUP BY h"); + assertEquals(EsQueryExec.class, p.getClass()); + EsQueryExec eqe = (EsQueryExec) p; + assertEquals(1, eqe.output().size()); + assertEquals("h", eqe.output().get(0).qualifiedName()); + assertEquals(DATETIME, eqe.output().get(0).dataType()); + assertThat(eqe.queryContainer().aggs().asAggBuilder().toString().replaceAll("\\s+", ""), + endsWith("\"date_histogram\":{\"field\":\"date\",\"missing_bucket\":true,\"value_type\":\"date\",\"order\":\"asc\"," + + "\"calendar_interval\":\"1M\",\"time_zone\":\"Z\"}}}]}}}")); + } + + public void testGroupByMoreMonthsHistogramQueryTranslator() { + PhysicalPlan p = optimizeAndPlan("SELECT HISTOGRAM(date, INTERVAL 5 MONTH) AS h FROM test GROUP BY h"); + assertEquals(EsQueryExec.class, p.getClass()); + EsQueryExec eqe = (EsQueryExec) p; + assertEquals(1, eqe.output().size()); + assertEquals("h", eqe.output().get(0).qualifiedName()); + assertEquals(DATETIME, eqe.output().get(0).dataType()); + assertThat(eqe.queryContainer().aggs().asAggBuilder().toString().replaceAll("\\s+", ""), + endsWith("\"date_histogram\":{\"field\":\"date\",\"missing_bucket\":true,\"value_type\":\"date\",\"order\":\"asc\"," + + "\"fixed_interval\":\"12960000000ms\",\"time_zone\":\"Z\"}}}]}}}")); + } public void testGroupByYearAndScalarsQueryTranslator() { PhysicalPlan p = optimizeAndPlan("SELECT YEAR(CAST(date + INTERVAL 5 months AS DATE)) FROM test GROUP BY 1");