SQL: Docs for basic aggregations
Adds documentation for basic aggregate functions supported by Elasticsearch SQL. Relates to elastic/x-pack-elasticsearch#2898 Original commit: elastic/x-pack-elasticsearch@ddc71165f2
This commit is contained in:
parent
7d429a74b7
commit
52f7ba8c5d
|
@ -326,7 +326,50 @@ include-tagged::{sql-specs}/datetime.csv-spec[minuteOfHour]
|
|||
include-tagged::{sql-specs}/datetime.csv-spec[secondOfMinute]
|
||||
--------------------------------------------------
|
||||
|
||||
[[sql-functions-aggregate]]
|
||||
=== Aggregate Functions
|
||||
|
||||
// aggregate
|
||||
==== Basic
|
||||
|
||||
// geospatial
|
||||
* https://en.wikipedia.org/wiki/Arithmetic_mean[Average] (`AVG`)
|
||||
|
||||
["source","sql",subs="attributes,callouts,macros"]
|
||||
--------------------------------------------------
|
||||
include-tagged::{sql-specs}/agg.sql-spec[avg]
|
||||
--------------------------------------------------
|
||||
|
||||
* Count the number of matching fields (`COUNT`)
|
||||
|
||||
["source","sql",subs="attributes,callouts,macros"]
|
||||
--------------------------------------------------
|
||||
include-tagged::{sql-specs}/agg.sql-spec[countStar]
|
||||
--------------------------------------------------
|
||||
|
||||
* Count the number of distinct values in matching documents (`COUNT(DISTINCT`)
|
||||
|
||||
["source","sql",subs="attributes,callouts,macros"]
|
||||
--------------------------------------------------
|
||||
include-tagged::{sql-specs}/agg.sql-spec[countDistinct]
|
||||
--------------------------------------------------
|
||||
|
||||
* Find the maximum value in matching documents (`MAX`)
|
||||
|
||||
["source","sql",subs="attributes,callouts,macros"]
|
||||
--------------------------------------------------
|
||||
include-tagged::{sql-specs}/agg.sql-spec[max]
|
||||
--------------------------------------------------
|
||||
|
||||
* Find the minimum value in matching documents (`MIN`)
|
||||
|
||||
["source","sql",subs="attributes,callouts,macros"]
|
||||
--------------------------------------------------
|
||||
include-tagged::{sql-specs}/agg.sql-spec[min]
|
||||
--------------------------------------------------
|
||||
|
||||
* https://en.wikipedia.org/wiki/Kahan_summation_algorithm[Sum]
|
||||
all values of matching documents (`SUM`).
|
||||
|
||||
["source","sql",subs="attributes,callouts,macros"]
|
||||
--------------------------------------------------
|
||||
include-tagged::{sql-specs}/agg.csv-spec[sum]
|
||||
--------------------------------------------------
|
||||
|
|
|
@ -85,7 +85,7 @@ public class FunctionRegistry {
|
|||
def(Min.class, Min::new),
|
||||
def(Sum.class, Sum::new),
|
||||
// Statistics
|
||||
def(Mean.class, Mean::new),
|
||||
def(Mean.class, Mean::new), // TODO can we just use Avg?
|
||||
def(StddevPop.class, StddevPop::new),
|
||||
def(VarPop.class, VarPop::new),
|
||||
def(Percentile.class, Percentile::new),
|
||||
|
|
|
@ -12,6 +12,9 @@ import org.elasticsearch.xpack.sql.tree.Location;
|
|||
import org.elasticsearch.xpack.sql.tree.NodeInfo;
|
||||
import org.elasticsearch.xpack.sql.type.DataType;
|
||||
|
||||
/**
|
||||
* Find the arithmatic mean of a field.
|
||||
*/
|
||||
public class Avg extends NumericAggregate implements EnclosedAgg {
|
||||
|
||||
public Avg(Location location, Expression field) {
|
||||
|
|
|
@ -14,6 +14,11 @@ import org.elasticsearch.xpack.sql.tree.NodeInfo;
|
|||
import org.elasticsearch.xpack.sql.type.DataType;
|
||||
import org.elasticsearch.xpack.sql.type.DataTypes;
|
||||
|
||||
/**
|
||||
* Count the number of documents matched ({@code COUNT})
|
||||
* <strong>OR</strong> count the number of distinct values
|
||||
* for a field that matched ({@code COUNT(DISTINCT}.
|
||||
*/
|
||||
public class Count extends AggregateFunction {
|
||||
|
||||
private final boolean distinct;
|
||||
|
|
|
@ -11,6 +11,9 @@ import org.elasticsearch.xpack.sql.tree.Location;
|
|||
import org.elasticsearch.xpack.sql.tree.NodeInfo;
|
||||
import org.elasticsearch.xpack.sql.type.DataType;
|
||||
|
||||
/**
|
||||
* Find the maximum value in matching documents.
|
||||
*/
|
||||
public class Max extends NumericAggregate implements EnclosedAgg {
|
||||
|
||||
public Max(Location location, Expression field) {
|
||||
|
|
|
@ -11,6 +11,9 @@ import org.elasticsearch.xpack.sql.tree.Location;
|
|||
import org.elasticsearch.xpack.sql.tree.NodeInfo;
|
||||
import org.elasticsearch.xpack.sql.type.DataType;
|
||||
|
||||
/**
|
||||
* Find the minimum value in matched documents.
|
||||
*/
|
||||
public class Min extends NumericAggregate implements EnclosedAgg {
|
||||
|
||||
public Min(Location location, Expression field) {
|
||||
|
|
|
@ -11,6 +11,9 @@ import org.elasticsearch.xpack.sql.tree.Location;
|
|||
import org.elasticsearch.xpack.sql.tree.NodeInfo;
|
||||
import org.elasticsearch.xpack.sql.type.DataType;
|
||||
|
||||
/**
|
||||
* Sum all values of a field in matching documents.
|
||||
*/
|
||||
public class Sum extends NumericAggregate implements EnclosedAgg {
|
||||
|
||||
public Sum(Location location, Expression field) {
|
||||
|
|
|
@ -7,7 +7,7 @@ SELECT gender, PERCENTILE(emp_no, 97) p1 FROM test_emp GROUP BY gender;
|
|||
|
||||
gender:s | p1:d
|
||||
M | 10095.6112
|
||||
F | 10099.1936
|
||||
F | 10099.1936
|
||||
;
|
||||
|
||||
singlePercentileWithComma
|
||||
|
@ -15,53 +15,63 @@ SELECT gender, PERCENTILE(emp_no, 97.76) p1 FROM test_emp GROUP BY gender;
|
|||
|
||||
gender:s | p1:d
|
||||
M | 10095.6112
|
||||
F | 10099.1936
|
||||
F | 10099.1936
|
||||
;
|
||||
|
||||
multiplePercentilesOneWithCommaOneWithout
|
||||
SELECT gender, PERCENTILE(emp_no, 92.45) p1, PERCENTILE(emp_no, 91) p2 FROM test_emp GROUP BY gender;
|
||||
|
||||
gender:s | p1:d | p2:d
|
||||
M | 10090.319 | 10087.68
|
||||
F | 10095.128 | 10093.52
|
||||
M | 10090.319 | 10087.68
|
||||
F | 10095.128 | 10093.52
|
||||
;
|
||||
|
||||
multiplePercentilesWithoutComma
|
||||
SELECT gender, PERCENTILE(emp_no, 91) p1, PERCENTILE(emp_no, 89) p2 FROM test_emp GROUP BY gender;
|
||||
|
||||
gender:s | p1:d | p2:d
|
||||
M | 10087.68 | 10085.18
|
||||
F | 10093.52 | 10092.08
|
||||
M | 10087.68 | 10085.18
|
||||
F | 10093.52 | 10092.08
|
||||
;
|
||||
|
||||
multiplePercentilesWithComma
|
||||
SELECT gender, PERCENTILE(emp_no, 85.7) p1, PERCENTILE(emp_no, 94.3) p2 FROM test_emp GROUP BY gender;
|
||||
|
||||
gender:s | p1:d | p2:d
|
||||
M | 10083.134 | 10091.932
|
||||
F | 10088.852 | 10097.792
|
||||
M | 10083.134 | 10091.932
|
||||
F | 10088.852 | 10097.792
|
||||
;
|
||||
|
||||
percentileRank
|
||||
SELECT gender, PERCENTILE_RANK(emp_no, 10025) rank FROM test_emp GROUP BY gender;
|
||||
|
||||
gender:s | rank:d
|
||||
M | 23.41269841269841
|
||||
F | 26.351351351351347
|
||||
M | 23.41269841269841
|
||||
F | 26.351351351351347
|
||||
;
|
||||
|
||||
multiplePercentileRanks
|
||||
SELECT gender, PERCENTILE_RANK(emp_no, 10030.0) rank1, PERCENTILE_RANK(emp_no, 10025) rank2 FROM test_emp GROUP BY gender;
|
||||
|
||||
gender:s | rank1:d | rank2:d
|
||||
M | 29.365079365079367 | 23.41269841269841
|
||||
F | 29.93762993762994 | 26.351351351351347
|
||||
M | 29.365079365079367 | 23.41269841269841
|
||||
F | 29.93762993762994 | 26.351351351351347
|
||||
;
|
||||
|
||||
multiplePercentilesAndPercentileRank
|
||||
SELECT gender, PERCENTILE(emp_no, 97.76) p1, PERCENTILE(emp_no, 93.3) p2, PERCENTILE_RANK(emp_no, 10025) rank FROM test_emp GROUP BY gender;
|
||||
|
||||
gender:s | p1:d | p2:d | rank:d
|
||||
M | 10095.6112 | 10090.846 | 23.41269841269841
|
||||
F | 10099.1936 | 10096.351999999999 | 26.351351351351347
|
||||
;
|
||||
M | 10095.6112 | 10090.846 | 23.41269841269841
|
||||
F | 10099.1936 | 10096.351999999999 | 26.351351351351347
|
||||
;
|
||||
|
||||
// Simple sum used in documentation
|
||||
sum
|
||||
// tag::sum
|
||||
SELECT SUM(salary) FROM test_emp;
|
||||
// end::sum
|
||||
SUM(salary)
|
||||
---------------
|
||||
4824855
|
||||
;
|
||||
|
|
|
@ -49,7 +49,9 @@ SELECT (emp_no % 3) + 1 AS e FROM test_emp GROUP BY e ORDER BY e;
|
|||
|
||||
// COUNT
|
||||
aggCountImplicit
|
||||
SELECT COUNT(*) c FROM "test_emp";
|
||||
// tag::countStar
|
||||
SELECT COUNT(*) AS count FROM test_emp;
|
||||
// end::countStar
|
||||
aggCountImplicitWithCast
|
||||
SELECT CAST(COUNT(*) AS INT) c FROM "test_emp";
|
||||
aggCountImplicitWithConstant
|
||||
|
@ -64,6 +66,11 @@ aggCountAliasWithCastAndFilter
|
|||
SELECT gender g, CAST(COUNT(*) AS INT) c FROM "test_emp" WHERE emp_no < 10020 GROUP BY gender;
|
||||
aggCountWithAlias
|
||||
SELECT gender g, COUNT(*) c FROM "test_emp" GROUP BY g;
|
||||
countDistinct
|
||||
// tag::countDistinct
|
||||
SELECT COUNT(DISTINCT hire_date) AS count FROM test_emp;
|
||||
// end::countDistinct
|
||||
|
||||
|
||||
// Conditional COUNT
|
||||
aggCountAndHaving
|
||||
|
@ -97,7 +104,9 @@ SELECT gender g, COUNT(gender) c FROM "test_emp" GROUP BY g HAVING c > 10 AND CO
|
|||
|
||||
// MIN
|
||||
aggMinImplicit
|
||||
SELECT MIN(emp_no) m FROM "test_emp";
|
||||
// tag::min
|
||||
SELECT MIN(emp_no) AS min FROM test_emp;
|
||||
// end::min
|
||||
aggMinImplicitWithCast
|
||||
SELECT CAST(MIN(emp_no) AS SMALLINT) m FROM "test_emp";
|
||||
aggMin
|
||||
|
@ -133,7 +142,9 @@ SELECT gender g, MIN(emp_no) m FROM "test_emp" GROUP BY g HAVING m > 10 AND MIN(
|
|||
|
||||
// MAX
|
||||
aggMaxImplicit
|
||||
SELECT MAX(emp_no) c FROM "test_emp";
|
||||
// tag::max
|
||||
SELECT MAX(salary) AS max FROM test_emp;
|
||||
// end::max
|
||||
aggMaxImplicitWithCast
|
||||
SELECT CAST(MAX(emp_no) AS SMALLINT) c FROM "test_emp";
|
||||
aggMax
|
||||
|
@ -203,6 +214,10 @@ SELECT gender g, CAST(AVG(emp_no) AS FLOAT) a FROM "test_emp" GROUP BY gender;
|
|||
// casting to an exact type - varchar, bigint, etc... will likely fail due to rounding error
|
||||
aggAvgWithCastToDouble
|
||||
SELECT gender g, CAST(AVG(emp_no) AS DOUBLE) a FROM "test_emp" GROUP BY gender;
|
||||
aggAvg
|
||||
// tag::avg
|
||||
SELECT AVG(salary) AS avg FROM test_emp;
|
||||
// end::avg
|
||||
aggAvgWithCastAndCount
|
||||
SELECT gender g, CAST(AVG(emp_no) AS FLOAT) a, COUNT(1) c FROM "test_emp" GROUP BY gender;
|
||||
aggAvgWithCastAndCountWithFilter
|
||||
|
|
Loading…
Reference in New Issue