From 52f7ba8c5d55991fc75624b364fc68f700c9eef0 Mon Sep 17 00:00:00 2001 From: Nik Everett Date: Tue, 23 Jan 2018 10:59:44 -0500 Subject: [PATCH] SQL: Docs for basic aggregations Adds documentation for basic aggregate functions supported by Elasticsearch SQL. Relates to elastic/x-pack-elasticsearch#2898 Original commit: elastic/x-pack-elasticsearch@ddc71165f26f6869c37a54d3e561537007d6f081 --- docs/en/sql/functions/index.asciidoc | 47 ++++++++++++++++++- .../expression/function/FunctionRegistry.java | 2 +- .../expression/function/aggregate/Avg.java | 3 ++ .../expression/function/aggregate/Count.java | 5 ++ .../expression/function/aggregate/Max.java | 3 ++ .../expression/function/aggregate/Min.java | 3 ++ .../expression/function/aggregate/Sum.java | 3 ++ qa/sql/src/main/resources/agg.csv-spec | 40 ++++++++++------ qa/sql/src/main/resources/agg.sql-spec | 21 +++++++-- 9 files changed, 106 insertions(+), 21 deletions(-) diff --git a/docs/en/sql/functions/index.asciidoc b/docs/en/sql/functions/index.asciidoc index f991837ca2c..b92e68b338c 100644 --- a/docs/en/sql/functions/index.asciidoc +++ b/docs/en/sql/functions/index.asciidoc @@ -326,7 +326,50 @@ include-tagged::{sql-specs}/datetime.csv-spec[minuteOfHour] include-tagged::{sql-specs}/datetime.csv-spec[secondOfMinute] -------------------------------------------------- +[[sql-functions-aggregate]] +=== Aggregate Functions -// aggregate +==== Basic -// geospatial +* https://en.wikipedia.org/wiki/Arithmetic_mean[Average] (`AVG`) + +["source","sql",subs="attributes,callouts,macros"] +-------------------------------------------------- +include-tagged::{sql-specs}/agg.sql-spec[avg] +-------------------------------------------------- + +* Count the number of matching fields (`COUNT`) + +["source","sql",subs="attributes,callouts,macros"] +-------------------------------------------------- +include-tagged::{sql-specs}/agg.sql-spec[countStar] +-------------------------------------------------- + +* Count the number of distinct values in matching documents (`COUNT(DISTINCT`) + +["source","sql",subs="attributes,callouts,macros"] +-------------------------------------------------- +include-tagged::{sql-specs}/agg.sql-spec[countDistinct] +-------------------------------------------------- + +* Find the maximum value in matching documents (`MAX`) + +["source","sql",subs="attributes,callouts,macros"] +-------------------------------------------------- +include-tagged::{sql-specs}/agg.sql-spec[max] +-------------------------------------------------- + +* Find the minimum value in matching documents (`MIN`) + +["source","sql",subs="attributes,callouts,macros"] +-------------------------------------------------- +include-tagged::{sql-specs}/agg.sql-spec[min] +-------------------------------------------------- + +* https://en.wikipedia.org/wiki/Kahan_summation_algorithm[Sum] +all values of matching documents (`SUM`). + +["source","sql",subs="attributes,callouts,macros"] +-------------------------------------------------- +include-tagged::{sql-specs}/agg.csv-spec[sum] +-------------------------------------------------- diff --git a/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/expression/function/FunctionRegistry.java b/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/expression/function/FunctionRegistry.java index 1699cb05247..5c01f3f252f 100644 --- a/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/expression/function/FunctionRegistry.java +++ b/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/expression/function/FunctionRegistry.java @@ -85,7 +85,7 @@ public class FunctionRegistry { def(Min.class, Min::new), def(Sum.class, Sum::new), // Statistics - def(Mean.class, Mean::new), + def(Mean.class, Mean::new), // TODO can we just use Avg? def(StddevPop.class, StddevPop::new), def(VarPop.class, VarPop::new), def(Percentile.class, Percentile::new), diff --git a/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/expression/function/aggregate/Avg.java b/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/expression/function/aggregate/Avg.java index 7298f8eb767..7c413feba84 100644 --- a/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/expression/function/aggregate/Avg.java +++ b/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/expression/function/aggregate/Avg.java @@ -12,6 +12,9 @@ import org.elasticsearch.xpack.sql.tree.Location; import org.elasticsearch.xpack.sql.tree.NodeInfo; import org.elasticsearch.xpack.sql.type.DataType; +/** + * Find the arithmatic mean of a field. + */ public class Avg extends NumericAggregate implements EnclosedAgg { public Avg(Location location, Expression field) { diff --git a/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/expression/function/aggregate/Count.java b/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/expression/function/aggregate/Count.java index c8026180c5c..b646b41e6d2 100644 --- a/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/expression/function/aggregate/Count.java +++ b/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/expression/function/aggregate/Count.java @@ -14,6 +14,11 @@ import org.elasticsearch.xpack.sql.tree.NodeInfo; import org.elasticsearch.xpack.sql.type.DataType; import org.elasticsearch.xpack.sql.type.DataTypes; +/** + * Count the number of documents matched ({@code COUNT}) + * OR count the number of distinct values + * for a field that matched ({@code COUNT(DISTINCT}. + */ public class Count extends AggregateFunction { private final boolean distinct; diff --git a/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/expression/function/aggregate/Max.java b/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/expression/function/aggregate/Max.java index 0578216979d..6b710cf06d5 100644 --- a/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/expression/function/aggregate/Max.java +++ b/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/expression/function/aggregate/Max.java @@ -11,6 +11,9 @@ import org.elasticsearch.xpack.sql.tree.Location; import org.elasticsearch.xpack.sql.tree.NodeInfo; import org.elasticsearch.xpack.sql.type.DataType; +/** + * Find the maximum value in matching documents. + */ public class Max extends NumericAggregate implements EnclosedAgg { public Max(Location location, Expression field) { diff --git a/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/expression/function/aggregate/Min.java b/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/expression/function/aggregate/Min.java index ec6e60f3b1f..16adf6461e1 100644 --- a/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/expression/function/aggregate/Min.java +++ b/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/expression/function/aggregate/Min.java @@ -11,6 +11,9 @@ import org.elasticsearch.xpack.sql.tree.Location; import org.elasticsearch.xpack.sql.tree.NodeInfo; import org.elasticsearch.xpack.sql.type.DataType; +/** + * Find the minimum value in matched documents. + */ public class Min extends NumericAggregate implements EnclosedAgg { public Min(Location location, Expression field) { diff --git a/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/expression/function/aggregate/Sum.java b/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/expression/function/aggregate/Sum.java index 9fed861c947..25861542509 100644 --- a/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/expression/function/aggregate/Sum.java +++ b/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/expression/function/aggregate/Sum.java @@ -11,6 +11,9 @@ import org.elasticsearch.xpack.sql.tree.Location; import org.elasticsearch.xpack.sql.tree.NodeInfo; import org.elasticsearch.xpack.sql.type.DataType; +/** + * Sum all values of a field in matching documents. + */ public class Sum extends NumericAggregate implements EnclosedAgg { public Sum(Location location, Expression field) { diff --git a/qa/sql/src/main/resources/agg.csv-spec b/qa/sql/src/main/resources/agg.csv-spec index b84ba44bffc..d12f1c3f4b6 100644 --- a/qa/sql/src/main/resources/agg.csv-spec +++ b/qa/sql/src/main/resources/agg.csv-spec @@ -7,7 +7,7 @@ SELECT gender, PERCENTILE(emp_no, 97) p1 FROM test_emp GROUP BY gender; gender:s | p1:d M | 10095.6112 -F | 10099.1936 +F | 10099.1936 ; singlePercentileWithComma @@ -15,53 +15,63 @@ SELECT gender, PERCENTILE(emp_no, 97.76) p1 FROM test_emp GROUP BY gender; gender:s | p1:d M | 10095.6112 -F | 10099.1936 +F | 10099.1936 ; multiplePercentilesOneWithCommaOneWithout SELECT gender, PERCENTILE(emp_no, 92.45) p1, PERCENTILE(emp_no, 91) p2 FROM test_emp GROUP BY gender; gender:s | p1:d | p2:d -M | 10090.319 | 10087.68 -F | 10095.128 | 10093.52 +M | 10090.319 | 10087.68 +F | 10095.128 | 10093.52 ; multiplePercentilesWithoutComma SELECT gender, PERCENTILE(emp_no, 91) p1, PERCENTILE(emp_no, 89) p2 FROM test_emp GROUP BY gender; gender:s | p1:d | p2:d -M | 10087.68 | 10085.18 -F | 10093.52 | 10092.08 +M | 10087.68 | 10085.18 +F | 10093.52 | 10092.08 ; multiplePercentilesWithComma SELECT gender, PERCENTILE(emp_no, 85.7) p1, PERCENTILE(emp_no, 94.3) p2 FROM test_emp GROUP BY gender; gender:s | p1:d | p2:d -M | 10083.134 | 10091.932 -F | 10088.852 | 10097.792 +M | 10083.134 | 10091.932 +F | 10088.852 | 10097.792 ; percentileRank SELECT gender, PERCENTILE_RANK(emp_no, 10025) rank FROM test_emp GROUP BY gender; gender:s | rank:d -M | 23.41269841269841 -F | 26.351351351351347 +M | 23.41269841269841 +F | 26.351351351351347 ; multiplePercentileRanks SELECT gender, PERCENTILE_RANK(emp_no, 10030.0) rank1, PERCENTILE_RANK(emp_no, 10025) rank2 FROM test_emp GROUP BY gender; gender:s | rank1:d | rank2:d -M | 29.365079365079367 | 23.41269841269841 -F | 29.93762993762994 | 26.351351351351347 +M | 29.365079365079367 | 23.41269841269841 +F | 29.93762993762994 | 26.351351351351347 ; multiplePercentilesAndPercentileRank SELECT gender, PERCENTILE(emp_no, 97.76) p1, PERCENTILE(emp_no, 93.3) p2, PERCENTILE_RANK(emp_no, 10025) rank FROM test_emp GROUP BY gender; gender:s | p1:d | p2:d | rank:d -M | 10095.6112 | 10090.846 | 23.41269841269841 -F | 10099.1936 | 10096.351999999999 | 26.351351351351347 -; \ No newline at end of file +M | 10095.6112 | 10090.846 | 23.41269841269841 +F | 10099.1936 | 10096.351999999999 | 26.351351351351347 +; + +// Simple sum used in documentation +sum +// tag::sum +SELECT SUM(salary) FROM test_emp; +// end::sum + SUM(salary) +--------------- +4824855 +; diff --git a/qa/sql/src/main/resources/agg.sql-spec b/qa/sql/src/main/resources/agg.sql-spec index 00e691cf378..05405133a33 100644 --- a/qa/sql/src/main/resources/agg.sql-spec +++ b/qa/sql/src/main/resources/agg.sql-spec @@ -49,7 +49,9 @@ SELECT (emp_no % 3) + 1 AS e FROM test_emp GROUP BY e ORDER BY e; // COUNT aggCountImplicit -SELECT COUNT(*) c FROM "test_emp"; +// tag::countStar +SELECT COUNT(*) AS count FROM test_emp; +// end::countStar aggCountImplicitWithCast SELECT CAST(COUNT(*) AS INT) c FROM "test_emp"; aggCountImplicitWithConstant @@ -64,6 +66,11 @@ aggCountAliasWithCastAndFilter SELECT gender g, CAST(COUNT(*) AS INT) c FROM "test_emp" WHERE emp_no < 10020 GROUP BY gender; aggCountWithAlias SELECT gender g, COUNT(*) c FROM "test_emp" GROUP BY g; +countDistinct +// tag::countDistinct +SELECT COUNT(DISTINCT hire_date) AS count FROM test_emp; +// end::countDistinct + // Conditional COUNT aggCountAndHaving @@ -97,7 +104,9 @@ SELECT gender g, COUNT(gender) c FROM "test_emp" GROUP BY g HAVING c > 10 AND CO // MIN aggMinImplicit -SELECT MIN(emp_no) m FROM "test_emp"; +// tag::min +SELECT MIN(emp_no) AS min FROM test_emp; +// end::min aggMinImplicitWithCast SELECT CAST(MIN(emp_no) AS SMALLINT) m FROM "test_emp"; aggMin @@ -133,7 +142,9 @@ SELECT gender g, MIN(emp_no) m FROM "test_emp" GROUP BY g HAVING m > 10 AND MIN( // MAX aggMaxImplicit -SELECT MAX(emp_no) c FROM "test_emp"; +// tag::max +SELECT MAX(salary) AS max FROM test_emp; +// end::max aggMaxImplicitWithCast SELECT CAST(MAX(emp_no) AS SMALLINT) c FROM "test_emp"; aggMax @@ -203,6 +214,10 @@ SELECT gender g, CAST(AVG(emp_no) AS FLOAT) a FROM "test_emp" GROUP BY gender; // casting to an exact type - varchar, bigint, etc... will likely fail due to rounding error aggAvgWithCastToDouble SELECT gender g, CAST(AVG(emp_no) AS DOUBLE) a FROM "test_emp" GROUP BY gender; +aggAvg +// tag::avg +SELECT AVG(salary) AS avg FROM test_emp; +// end::avg aggAvgWithCastAndCount SELECT gender g, CAST(AVG(emp_no) AS FLOAT) a, COUNT(1) c FROM "test_emp" GROUP BY gender; aggAvgWithCastAndCountWithFilter