SQL: Docs for basic aggregations

Adds documentation for basic aggregate functions supported by Elasticsearch SQL. Relates to elastic/x-pack-elasticsearch#2898 Original commit: elastic/x-pack-elasticsearch@ddc71165f2
2018-01-23 10:59:44 -05:00 · 2018-01-23 10:59:44 -05:00 · 52f7ba8c5d
parent 7d429a74b7
commit 52f7ba8c5d
9 changed files with 106 additions and 21 deletions
--- a/docs/en/sql/functions/index.asciidoc
+++ b/docs/en/sql/functions/index.asciidoc
@ -326,7 +326,50 @@ include-tagged::{sql-specs}/datetime.csv-spec[minuteOfHour]
 include-tagged::{sql-specs}/datetime.csv-spec[secondOfMinute]
 --------------------------------------------------

+[[sql-functions-aggregate]]
+=== Aggregate Functions

-// aggregate
+==== Basic

-// geospatial
+* https://en.wikipedia.org/wiki/Arithmetic_mean[Average] (`AVG`)
+
+["source","sql",subs="attributes,callouts,macros"]
+--------------------------------------------------
+include-tagged::{sql-specs}/agg.sql-spec[avg]
+--------------------------------------------------
+
+* Count the number of matching fields (`COUNT`)
+
+["source","sql",subs="attributes,callouts,macros"]
+--------------------------------------------------
+include-tagged::{sql-specs}/agg.sql-spec[countStar]
+--------------------------------------------------
+
+* Count the number of distinct values in matching documents (`COUNT(DISTINCT`)
+
+["source","sql",subs="attributes,callouts,macros"]
+--------------------------------------------------
+include-tagged::{sql-specs}/agg.sql-spec[countDistinct]
+--------------------------------------------------
+
+* Find the maximum value in matching documents (`MAX`)
+
+["source","sql",subs="attributes,callouts,macros"]
+--------------------------------------------------
+include-tagged::{sql-specs}/agg.sql-spec[max]
+--------------------------------------------------
+
+* Find the minimum value in matching documents (`MIN`)
+
+["source","sql",subs="attributes,callouts,macros"]
+--------------------------------------------------
+include-tagged::{sql-specs}/agg.sql-spec[min]
+--------------------------------------------------
+
+* https://en.wikipedia.org/wiki/Kahan_summation_algorithm[Sum]
+all values of matching documents (`SUM`).
+
+["source","sql",subs="attributes,callouts,macros"]
+--------------------------------------------------
+include-tagged::{sql-specs}/agg.csv-spec[sum]
+--------------------------------------------------
--- a/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/expression/function/FunctionRegistry.java
+++ b/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/expression/function/FunctionRegistry.java
@ -85,7 +85,7 @@ public class FunctionRegistry {
            def(Min.class, Min::new),
            def(Sum.class, Sum::new),
            // Statistics
-            def(Mean.class, Mean::new),
+            def(Mean.class, Mean::new), // TODO can we just use Avg?
            def(StddevPop.class, StddevPop::new),
            def(VarPop.class, VarPop::new),
            def(Percentile.class, Percentile::new),
--- a/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/expression/function/aggregate/Avg.java
+++ b/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/expression/function/aggregate/Avg.java
@ -12,6 +12,9 @@ import org.elasticsearch.xpack.sql.tree.Location;
 import org.elasticsearch.xpack.sql.tree.NodeInfo;
 import org.elasticsearch.xpack.sql.type.DataType;

+/**
+ * Find the arithmatic mean of a field.
+ */
 public class Avg extends NumericAggregate implements EnclosedAgg {

    public Avg(Location location, Expression field) {
--- a/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/expression/function/aggregate/Count.java
+++ b/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/expression/function/aggregate/Count.java
@ -14,6 +14,11 @@ import org.elasticsearch.xpack.sql.tree.NodeInfo;
 import org.elasticsearch.xpack.sql.type.DataType;
 import org.elasticsearch.xpack.sql.type.DataTypes;

+/**
+ * Count the number of documents matched ({@code COUNT})
+ * <strong>OR</strong> count the number of distinct values
+ * for a field that matched ({@code COUNT(DISTINCT}.
+ */
 public class Count extends AggregateFunction {

    private final boolean distinct;
--- a/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/expression/function/aggregate/Max.java
+++ b/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/expression/function/aggregate/Max.java
@ -11,6 +11,9 @@ import org.elasticsearch.xpack.sql.tree.Location;
 import org.elasticsearch.xpack.sql.tree.NodeInfo;
 import org.elasticsearch.xpack.sql.type.DataType;

+/**
+ * Find the maximum value in matching documents.
+ */
 public class Max extends NumericAggregate implements EnclosedAgg {

    public Max(Location location, Expression field) {
--- a/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/expression/function/aggregate/Min.java
+++ b/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/expression/function/aggregate/Min.java
@ -11,6 +11,9 @@ import org.elasticsearch.xpack.sql.tree.Location;
 import org.elasticsearch.xpack.sql.tree.NodeInfo;
 import org.elasticsearch.xpack.sql.type.DataType;

+/**
+ * Find the minimum value in matched documents.
+ */
 public class Min extends NumericAggregate implements EnclosedAgg {

    public Min(Location location, Expression field) {
--- a/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/expression/function/aggregate/Sum.java
+++ b/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/expression/function/aggregate/Sum.java
@ -11,6 +11,9 @@ import org.elasticsearch.xpack.sql.tree.Location;
 import org.elasticsearch.xpack.sql.tree.NodeInfo;
 import org.elasticsearch.xpack.sql.type.DataType;

+/**
+ * Sum all values of a field in matching documents.
+ */
 public class Sum extends NumericAggregate implements EnclosedAgg {

    public Sum(Location location, Expression field) {
--- a/qa/sql/src/main/resources/agg.csv-spec
+++ b/qa/sql/src/main/resources/agg.csv-spec
@ -7,7 +7,7 @@ SELECT gender, PERCENTILE(emp_no, 97) p1 FROM test_emp GROUP BY gender;

 gender:s             | p1:d
 M                    | 10095.6112
-F                    | 10099.1936             
+F                    | 10099.1936
 ;

 singlePercentileWithComma
@ -15,53 +15,63 @@ SELECT gender, PERCENTILE(emp_no, 97.76) p1 FROM test_emp GROUP BY gender;

 gender:s             | p1:d
 M                    | 10095.6112
-F                    | 10099.1936             
+F                    | 10099.1936
 ;

 multiplePercentilesOneWithCommaOneWithout
 SELECT gender, PERCENTILE(emp_no, 92.45) p1, PERCENTILE(emp_no, 91) p2 FROM test_emp GROUP BY gender;

 gender:s             | p1:d                 | p2:d
-M                    | 10090.319            | 10087.68               
-F                    | 10095.128            | 10093.52     
+M                    | 10090.319            | 10087.68
+F                    | 10095.128            | 10093.52
 ;

 multiplePercentilesWithoutComma
 SELECT gender, PERCENTILE(emp_no, 91) p1, PERCENTILE(emp_no, 89) p2 FROM test_emp GROUP BY gender;

 gender:s             | p1:d                 | p2:d
-M                    | 10087.68             | 10085.18            
-F                    | 10093.52             | 10092.08  
+M                    | 10087.68             | 10085.18
+F                    | 10093.52             | 10092.08
 ;

 multiplePercentilesWithComma
 SELECT gender, PERCENTILE(emp_no, 85.7) p1, PERCENTILE(emp_no, 94.3) p2 FROM test_emp GROUP BY gender;

 gender:s             | p1:d                 | p2:d
-M                    | 10083.134            | 10091.932               
-F                    | 10088.852            | 10097.792     
+M                    | 10083.134            | 10091.932
+F                    | 10088.852            | 10097.792
 ;

 percentileRank
 SELECT gender, PERCENTILE_RANK(emp_no, 10025) rank FROM test_emp GROUP BY gender;

 gender:s             | rank:d
-M                    | 23.41269841269841   
-F                    | 26.351351351351347  
+M                    | 23.41269841269841
+F                    | 26.351351351351347
 ;

 multiplePercentileRanks
 SELECT gender, PERCENTILE_RANK(emp_no, 10030.0) rank1, PERCENTILE_RANK(emp_no, 10025) rank2 FROM test_emp GROUP BY gender;

 gender:s             | rank1:d              | rank2:d
-M                    | 29.365079365079367   | 23.41269841269841   
-F                    | 29.93762993762994    | 26.351351351351347  
+M                    | 29.365079365079367   | 23.41269841269841
+F                    | 29.93762993762994    | 26.351351351351347
 ;

 multiplePercentilesAndPercentileRank
 SELECT gender, PERCENTILE(emp_no, 97.76) p1, PERCENTILE(emp_no, 93.3) p2, PERCENTILE_RANK(emp_no, 10025) rank FROM test_emp GROUP BY gender;

 gender:s             | p1:d                 | p2:d                 | rank:d
-M                    | 10095.6112           | 10090.846            | 23.41269841269841   
-F                    | 10099.1936           | 10096.351999999999   | 26.351351351351347  
-;
+M                    | 10095.6112           | 10090.846            | 23.41269841269841
+F                    | 10099.1936           | 10096.351999999999   | 26.351351351351347
+;
+
+// Simple sum used in documentation
+sum
+// tag::sum
+SELECT SUM(salary) FROM test_emp;
+// end::sum
+  SUM(salary)
+---------------
+4824855
+;
--- a/qa/sql/src/main/resources/agg.sql-spec
+++ b/qa/sql/src/main/resources/agg.sql-spec
@ -49,7 +49,9 @@ SELECT (emp_no % 3) + 1 AS e FROM test_emp GROUP BY e ORDER BY e;

 // COUNT
 aggCountImplicit
-SELECT COUNT(*) c FROM "test_emp";
+// tag::countStar
+SELECT COUNT(*) AS count FROM test_emp;
+// end::countStar
 aggCountImplicitWithCast
 SELECT CAST(COUNT(*) AS INT) c FROM "test_emp";
 aggCountImplicitWithConstant
@ -64,6 +66,11 @@ aggCountAliasWithCastAndFilter
 SELECT gender g, CAST(COUNT(*) AS INT) c FROM "test_emp" WHERE emp_no < 10020 GROUP BY gender;
 aggCountWithAlias
 SELECT gender g, COUNT(*) c FROM "test_emp" GROUP BY g;
+countDistinct
+// tag::countDistinct
+SELECT COUNT(DISTINCT hire_date) AS count FROM test_emp;
+// end::countDistinct
+

 // Conditional COUNT
 aggCountAndHaving
@ -97,7 +104,9 @@ SELECT gender g, COUNT(gender) c FROM "test_emp" GROUP BY g HAVING c > 10 AND CO

 // MIN
 aggMinImplicit
-SELECT MIN(emp_no) m FROM "test_emp";
+// tag::min
+SELECT MIN(emp_no) AS min FROM test_emp;
+// end::min
 aggMinImplicitWithCast
 SELECT CAST(MIN(emp_no) AS SMALLINT) m FROM "test_emp";
 aggMin
@ -133,7 +142,9 @@ SELECT gender g, MIN(emp_no) m FROM "test_emp" GROUP BY g HAVING m > 10 AND MIN(

 // MAX
 aggMaxImplicit
-SELECT MAX(emp_no) c FROM "test_emp";
+// tag::max
+SELECT MAX(salary) AS max FROM test_emp;
+// end::max
 aggMaxImplicitWithCast
 SELECT CAST(MAX(emp_no) AS SMALLINT) c FROM "test_emp";
 aggMax
@ -203,6 +214,10 @@ SELECT gender g, CAST(AVG(emp_no) AS FLOAT) a FROM "test_emp" GROUP BY gender;
 // casting to an exact type - varchar, bigint, etc... will likely fail due to rounding error
 aggAvgWithCastToDouble
 SELECT gender g, CAST(AVG(emp_no) AS DOUBLE) a FROM "test_emp" GROUP BY gender;
+aggAvg
+// tag::avg
+SELECT AVG(salary) AS avg FROM test_emp;
+// end::avg
 aggAvgWithCastAndCount
 SELECT gender g, CAST(AVG(emp_no) AS FLOAT) a, COUNT(1) c FROM "test_emp" GROUP BY gender;
 aggAvgWithCastAndCountWithFilter