SQL: Docs for basic aggregations

Adds documentation for basic aggregate functions supported by
Elasticsearch SQL.

Relates to elastic/x-pack-elasticsearch#2898

Original commit: elastic/x-pack-elasticsearch@ddc71165f2
This commit is contained in:
Nik Everett 2018-01-23 10:59:44 -05:00
parent 7d429a74b7
commit 52f7ba8c5d
9 changed files with 106 additions and 21 deletions

View File

@ -326,7 +326,50 @@ include-tagged::{sql-specs}/datetime.csv-spec[minuteOfHour]
include-tagged::{sql-specs}/datetime.csv-spec[secondOfMinute]
--------------------------------------------------
[[sql-functions-aggregate]]
=== Aggregate Functions
// aggregate
==== Basic
// geospatial
* https://en.wikipedia.org/wiki/Arithmetic_mean[Average] (`AVG`)
["source","sql",subs="attributes,callouts,macros"]
--------------------------------------------------
include-tagged::{sql-specs}/agg.sql-spec[avg]
--------------------------------------------------
* Count the number of matching fields (`COUNT`)
["source","sql",subs="attributes,callouts,macros"]
--------------------------------------------------
include-tagged::{sql-specs}/agg.sql-spec[countStar]
--------------------------------------------------
* Count the number of distinct values in matching documents (`COUNT(DISTINCT`)
["source","sql",subs="attributes,callouts,macros"]
--------------------------------------------------
include-tagged::{sql-specs}/agg.sql-spec[countDistinct]
--------------------------------------------------
* Find the maximum value in matching documents (`MAX`)
["source","sql",subs="attributes,callouts,macros"]
--------------------------------------------------
include-tagged::{sql-specs}/agg.sql-spec[max]
--------------------------------------------------
* Find the minimum value in matching documents (`MIN`)
["source","sql",subs="attributes,callouts,macros"]
--------------------------------------------------
include-tagged::{sql-specs}/agg.sql-spec[min]
--------------------------------------------------
* https://en.wikipedia.org/wiki/Kahan_summation_algorithm[Sum]
all values of matching documents (`SUM`).
["source","sql",subs="attributes,callouts,macros"]
--------------------------------------------------
include-tagged::{sql-specs}/agg.csv-spec[sum]
--------------------------------------------------

View File

@ -85,7 +85,7 @@ public class FunctionRegistry {
def(Min.class, Min::new),
def(Sum.class, Sum::new),
// Statistics
def(Mean.class, Mean::new),
def(Mean.class, Mean::new), // TODO can we just use Avg?
def(StddevPop.class, StddevPop::new),
def(VarPop.class, VarPop::new),
def(Percentile.class, Percentile::new),

View File

@ -12,6 +12,9 @@ import org.elasticsearch.xpack.sql.tree.Location;
import org.elasticsearch.xpack.sql.tree.NodeInfo;
import org.elasticsearch.xpack.sql.type.DataType;
/**
* Find the arithmatic mean of a field.
*/
public class Avg extends NumericAggregate implements EnclosedAgg {
public Avg(Location location, Expression field) {

View File

@ -14,6 +14,11 @@ import org.elasticsearch.xpack.sql.tree.NodeInfo;
import org.elasticsearch.xpack.sql.type.DataType;
import org.elasticsearch.xpack.sql.type.DataTypes;
/**
* Count the number of documents matched ({@code COUNT})
* <strong>OR</strong> count the number of distinct values
* for a field that matched ({@code COUNT(DISTINCT}.
*/
public class Count extends AggregateFunction {
private final boolean distinct;

View File

@ -11,6 +11,9 @@ import org.elasticsearch.xpack.sql.tree.Location;
import org.elasticsearch.xpack.sql.tree.NodeInfo;
import org.elasticsearch.xpack.sql.type.DataType;
/**
* Find the maximum value in matching documents.
*/
public class Max extends NumericAggregate implements EnclosedAgg {
public Max(Location location, Expression field) {

View File

@ -11,6 +11,9 @@ import org.elasticsearch.xpack.sql.tree.Location;
import org.elasticsearch.xpack.sql.tree.NodeInfo;
import org.elasticsearch.xpack.sql.type.DataType;
/**
* Find the minimum value in matched documents.
*/
public class Min extends NumericAggregate implements EnclosedAgg {
public Min(Location location, Expression field) {

View File

@ -11,6 +11,9 @@ import org.elasticsearch.xpack.sql.tree.Location;
import org.elasticsearch.xpack.sql.tree.NodeInfo;
import org.elasticsearch.xpack.sql.type.DataType;
/**
* Sum all values of a field in matching documents.
*/
public class Sum extends NumericAggregate implements EnclosedAgg {
public Sum(Location location, Expression field) {

View File

@ -7,7 +7,7 @@ SELECT gender, PERCENTILE(emp_no, 97) p1 FROM test_emp GROUP BY gender;
gender:s | p1:d
M | 10095.6112
F | 10099.1936
F | 10099.1936
;
singlePercentileWithComma
@ -15,53 +15,63 @@ SELECT gender, PERCENTILE(emp_no, 97.76) p1 FROM test_emp GROUP BY gender;
gender:s | p1:d
M | 10095.6112
F | 10099.1936
F | 10099.1936
;
multiplePercentilesOneWithCommaOneWithout
SELECT gender, PERCENTILE(emp_no, 92.45) p1, PERCENTILE(emp_no, 91) p2 FROM test_emp GROUP BY gender;
gender:s | p1:d | p2:d
M | 10090.319 | 10087.68
F | 10095.128 | 10093.52
M | 10090.319 | 10087.68
F | 10095.128 | 10093.52
;
multiplePercentilesWithoutComma
SELECT gender, PERCENTILE(emp_no, 91) p1, PERCENTILE(emp_no, 89) p2 FROM test_emp GROUP BY gender;
gender:s | p1:d | p2:d
M | 10087.68 | 10085.18
F | 10093.52 | 10092.08
M | 10087.68 | 10085.18
F | 10093.52 | 10092.08
;
multiplePercentilesWithComma
SELECT gender, PERCENTILE(emp_no, 85.7) p1, PERCENTILE(emp_no, 94.3) p2 FROM test_emp GROUP BY gender;
gender:s | p1:d | p2:d
M | 10083.134 | 10091.932
F | 10088.852 | 10097.792
M | 10083.134 | 10091.932
F | 10088.852 | 10097.792
;
percentileRank
SELECT gender, PERCENTILE_RANK(emp_no, 10025) rank FROM test_emp GROUP BY gender;
gender:s | rank:d
M | 23.41269841269841
F | 26.351351351351347
M | 23.41269841269841
F | 26.351351351351347
;
multiplePercentileRanks
SELECT gender, PERCENTILE_RANK(emp_no, 10030.0) rank1, PERCENTILE_RANK(emp_no, 10025) rank2 FROM test_emp GROUP BY gender;
gender:s | rank1:d | rank2:d
M | 29.365079365079367 | 23.41269841269841
F | 29.93762993762994 | 26.351351351351347
M | 29.365079365079367 | 23.41269841269841
F | 29.93762993762994 | 26.351351351351347
;
multiplePercentilesAndPercentileRank
SELECT gender, PERCENTILE(emp_no, 97.76) p1, PERCENTILE(emp_no, 93.3) p2, PERCENTILE_RANK(emp_no, 10025) rank FROM test_emp GROUP BY gender;
gender:s | p1:d | p2:d | rank:d
M | 10095.6112 | 10090.846 | 23.41269841269841
F | 10099.1936 | 10096.351999999999 | 26.351351351351347
;
M | 10095.6112 | 10090.846 | 23.41269841269841
F | 10099.1936 | 10096.351999999999 | 26.351351351351347
;
// Simple sum used in documentation
sum
// tag::sum
SELECT SUM(salary) FROM test_emp;
// end::sum
SUM(salary)
---------------
4824855
;

View File

@ -49,7 +49,9 @@ SELECT (emp_no % 3) + 1 AS e FROM test_emp GROUP BY e ORDER BY e;
// COUNT
aggCountImplicit
SELECT COUNT(*) c FROM "test_emp";
// tag::countStar
SELECT COUNT(*) AS count FROM test_emp;
// end::countStar
aggCountImplicitWithCast
SELECT CAST(COUNT(*) AS INT) c FROM "test_emp";
aggCountImplicitWithConstant
@ -64,6 +66,11 @@ aggCountAliasWithCastAndFilter
SELECT gender g, CAST(COUNT(*) AS INT) c FROM "test_emp" WHERE emp_no < 10020 GROUP BY gender;
aggCountWithAlias
SELECT gender g, COUNT(*) c FROM "test_emp" GROUP BY g;
countDistinct
// tag::countDistinct
SELECT COUNT(DISTINCT hire_date) AS count FROM test_emp;
// end::countDistinct
// Conditional COUNT
aggCountAndHaving
@ -97,7 +104,9 @@ SELECT gender g, COUNT(gender) c FROM "test_emp" GROUP BY g HAVING c > 10 AND CO
// MIN
aggMinImplicit
SELECT MIN(emp_no) m FROM "test_emp";
// tag::min
SELECT MIN(emp_no) AS min FROM test_emp;
// end::min
aggMinImplicitWithCast
SELECT CAST(MIN(emp_no) AS SMALLINT) m FROM "test_emp";
aggMin
@ -133,7 +142,9 @@ SELECT gender g, MIN(emp_no) m FROM "test_emp" GROUP BY g HAVING m > 10 AND MIN(
// MAX
aggMaxImplicit
SELECT MAX(emp_no) c FROM "test_emp";
// tag::max
SELECT MAX(salary) AS max FROM test_emp;
// end::max
aggMaxImplicitWithCast
SELECT CAST(MAX(emp_no) AS SMALLINT) c FROM "test_emp";
aggMax
@ -203,6 +214,10 @@ SELECT gender g, CAST(AVG(emp_no) AS FLOAT) a FROM "test_emp" GROUP BY gender;
// casting to an exact type - varchar, bigint, etc... will likely fail due to rounding error
aggAvgWithCastToDouble
SELECT gender g, CAST(AVG(emp_no) AS DOUBLE) a FROM "test_emp" GROUP BY gender;
aggAvg
// tag::avg
SELECT AVG(salary) AS avg FROM test_emp;
// end::avg
aggAvgWithCastAndCount
SELECT gender g, CAST(AVG(emp_no) AS FLOAT) a, COUNT(1) c FROM "test_emp" GROUP BY gender;
aggAvgWithCastAndCountWithFilter