SQL: Introduce MAD (MedianAbsoluteDeviation) aggregation (#40048)
Add Median Absolute Deviation aggregation Fix #39597 (cherry picked from commit 4f09613942a9249d06c74da64ad7e6f362e97f56)
This commit is contained in:
parent
0d152a54f8
commit
3960374a6f
|
@ -407,6 +407,30 @@ https://en.wikipedia.org/wiki/Kurtosis[Quantify] the shape of the distribution o
|
||||||
include-tagged::{sql-specs}/docs.csv-spec[aggKurtosis]
|
include-tagged::{sql-specs}/docs.csv-spec[aggKurtosis]
|
||||||
--------------------------------------------------
|
--------------------------------------------------
|
||||||
|
|
||||||
|
[[sql-functions-aggs-mad]]
|
||||||
|
===== `MAD`
|
||||||
|
|
||||||
|
.Synopsis:
|
||||||
|
[source, sql]
|
||||||
|
--------------------------------------------------
|
||||||
|
MAD(field_name<1>)
|
||||||
|
--------------------------------------------------
|
||||||
|
|
||||||
|
*Input*:
|
||||||
|
|
||||||
|
<1> a numeric field
|
||||||
|
|
||||||
|
*Output*: `double` numeric value
|
||||||
|
|
||||||
|
.Description:
|
||||||
|
|
||||||
|
https://en.wikipedia.org/wiki/Median_absolute_deviation[Measure] the variability of the input values in the field `field_name`.
|
||||||
|
|
||||||
|
["source","sql",subs="attributes,macros"]
|
||||||
|
--------------------------------------------------
|
||||||
|
include-tagged::{sql-specs}/docs.csv-spec[aggMad]
|
||||||
|
--------------------------------------------------
|
||||||
|
|
||||||
[[sql-functions-aggs-percentile]]
|
[[sql-functions-aggs-percentile]]
|
||||||
===== `PERCENTILE`
|
===== `PERCENTILE`
|
||||||
|
|
||||||
|
|
|
@ -469,3 +469,47 @@ null | 1 | 10
|
||||||
F | 4 | 6
|
F | 4 | 6
|
||||||
M | 1 | 4
|
M | 1 | 4
|
||||||
;
|
;
|
||||||
|
|
||||||
|
medianAbsoluteDeviation
|
||||||
|
schema::gender:s|mad:d
|
||||||
|
SELECT gender, MAD(salary) AS mad FROM test_emp GROUP BY gender ORDER BY gender;
|
||||||
|
|
||||||
|
gender | mad
|
||||||
|
---------------+---------------
|
||||||
|
null |10789.0
|
||||||
|
F |12719.0
|
||||||
|
M |8905.0
|
||||||
|
;
|
||||||
|
|
||||||
|
medianAbsoluteDeviationOnTwoFields
|
||||||
|
schema::gender:s|avg:l|mad_s:l|mad_l:d
|
||||||
|
SELECT gender, FLOOR(AVG(salary)) AS avg, FLOOR(MAD(salary)) AS mad_s, MAD(languages) AS mad_l FROM test_emp GROUP BY gender ORDER BY gender;
|
||||||
|
|
||||||
|
gender | avg | mad_s | mad_l
|
||||||
|
---------------+---------------+---------------+---------------
|
||||||
|
null |48760 |10789 |2.0
|
||||||
|
F |50490 |12719 |1.5
|
||||||
|
M |46860 |8905 |1.0
|
||||||
|
;
|
||||||
|
|
||||||
|
medianAbsoluteDeviationOnSecondaryFieldWithOrder
|
||||||
|
schema::gender:s|mad:d
|
||||||
|
SELECT gender, MAD(salary) AS mad FROM test_emp GROUP BY gender ORDER BY mad ASC;
|
||||||
|
|
||||||
|
gender | mad
|
||||||
|
---------------+---------------
|
||||||
|
M |8905.0
|
||||||
|
null |10789.0
|
||||||
|
F |12719.0
|
||||||
|
;
|
||||||
|
|
||||||
|
|
||||||
|
medianAbsoluteDeviationOnSecondaryFieldWithOrderAndHaving
|
||||||
|
schema::gender:s|mad:d
|
||||||
|
SELECT gender, MAD(salary) AS mad FROM test_emp GROUP BY gender HAVING mad > 10000 ORDER BY mad ASC;
|
||||||
|
|
||||||
|
gender | mad
|
||||||
|
---------------+---------------
|
||||||
|
null |10789.0
|
||||||
|
F |12719.0
|
||||||
|
;
|
||||||
|
|
|
@ -17,6 +17,7 @@ MAX |AGGREGATE
|
||||||
MIN |AGGREGATE
|
MIN |AGGREGATE
|
||||||
SUM |AGGREGATE
|
SUM |AGGREGATE
|
||||||
KURTOSIS |AGGREGATE
|
KURTOSIS |AGGREGATE
|
||||||
|
MAD |AGGREGATE
|
||||||
PERCENTILE |AGGREGATE
|
PERCENTILE |AGGREGATE
|
||||||
PERCENTILE_RANK |AGGREGATE
|
PERCENTILE_RANK |AGGREGATE
|
||||||
SKEWNESS |AGGREGATE
|
SKEWNESS |AGGREGATE
|
||||||
|
|
|
@ -194,6 +194,7 @@ MAX |AGGREGATE
|
||||||
MIN |AGGREGATE
|
MIN |AGGREGATE
|
||||||
SUM |AGGREGATE
|
SUM |AGGREGATE
|
||||||
KURTOSIS |AGGREGATE
|
KURTOSIS |AGGREGATE
|
||||||
|
MAD |AGGREGATE
|
||||||
PERCENTILE |AGGREGATE
|
PERCENTILE |AGGREGATE
|
||||||
PERCENTILE_RANK |AGGREGATE
|
PERCENTILE_RANK |AGGREGATE
|
||||||
SKEWNESS |AGGREGATE
|
SKEWNESS |AGGREGATE
|
||||||
|
@ -1183,6 +1184,16 @@ SELECT MIN(salary) AS min, MAX(salary) AS max, KURTOSIS(salary) AS k FROM emp;
|
||||||
// end::aggKurtosis
|
// end::aggKurtosis
|
||||||
;
|
;
|
||||||
|
|
||||||
|
aggMad
|
||||||
|
// tag::aggMad
|
||||||
|
SELECT MIN(salary) AS min, MAX(salary) AS max, AVG(salary) AS avg, MAD(salary) AS mad FROM emp;
|
||||||
|
|
||||||
|
min | max | avg | mad
|
||||||
|
---------------+---------------+---------------+---------------
|
||||||
|
25324 |74999 |48248.55 |10096.5
|
||||||
|
// end::aggMad
|
||||||
|
;
|
||||||
|
|
||||||
aggPercentile
|
aggPercentile
|
||||||
// tag::aggPercentile
|
// tag::aggPercentile
|
||||||
SELECT languages, PERCENTILE(salary, 95) AS "95th" FROM emp
|
SELECT languages, PERCENTILE(salary, 95) AS "95th" FROM emp
|
||||||
|
|
|
@ -14,6 +14,7 @@ import org.elasticsearch.xpack.sql.expression.function.aggregate.First;
|
||||||
import org.elasticsearch.xpack.sql.expression.function.aggregate.Kurtosis;
|
import org.elasticsearch.xpack.sql.expression.function.aggregate.Kurtosis;
|
||||||
import org.elasticsearch.xpack.sql.expression.function.aggregate.Last;
|
import org.elasticsearch.xpack.sql.expression.function.aggregate.Last;
|
||||||
import org.elasticsearch.xpack.sql.expression.function.aggregate.Max;
|
import org.elasticsearch.xpack.sql.expression.function.aggregate.Max;
|
||||||
|
import org.elasticsearch.xpack.sql.expression.function.aggregate.MedianAbsoluteDeviation;
|
||||||
import org.elasticsearch.xpack.sql.expression.function.aggregate.Min;
|
import org.elasticsearch.xpack.sql.expression.function.aggregate.Min;
|
||||||
import org.elasticsearch.xpack.sql.expression.function.aggregate.Percentile;
|
import org.elasticsearch.xpack.sql.expression.function.aggregate.Percentile;
|
||||||
import org.elasticsearch.xpack.sql.expression.function.aggregate.PercentileRank;
|
import org.elasticsearch.xpack.sql.expression.function.aggregate.PercentileRank;
|
||||||
|
@ -153,13 +154,16 @@ public class FunctionRegistry {
|
||||||
def(Min.class, Min::new, "MIN"),
|
def(Min.class, Min::new, "MIN"),
|
||||||
def(Sum.class, Sum::new, "SUM"));
|
def(Sum.class, Sum::new, "SUM"));
|
||||||
// Statistics
|
// Statistics
|
||||||
addToMap(def(StddevPop.class, StddevPop::new, "STDDEV_POP"),
|
addToMap(
|
||||||
def(VarPop.class, VarPop::new,"VAR_POP"),
|
def(Kurtosis.class, Kurtosis::new, "KURTOSIS"),
|
||||||
|
def(MedianAbsoluteDeviation.class, MedianAbsoluteDeviation::new, "MAD"),
|
||||||
def(Percentile.class, Percentile::new, "PERCENTILE"),
|
def(Percentile.class, Percentile::new, "PERCENTILE"),
|
||||||
def(PercentileRank.class, PercentileRank::new, "PERCENTILE_RANK"),
|
def(PercentileRank.class, PercentileRank::new, "PERCENTILE_RANK"),
|
||||||
def(SumOfSquares.class, SumOfSquares::new, "SUM_OF_SQUARES"),
|
|
||||||
def(Skewness.class, Skewness::new, "SKEWNESS"),
|
def(Skewness.class, Skewness::new, "SKEWNESS"),
|
||||||
def(Kurtosis.class, Kurtosis::new, "KURTOSIS"));
|
def(StddevPop.class, StddevPop::new, "STDDEV_POP"),
|
||||||
|
def(SumOfSquares.class, SumOfSquares::new, "SUM_OF_SQUARES"),
|
||||||
|
def(VarPop.class, VarPop::new,"VAR_POP")
|
||||||
|
);
|
||||||
// histogram
|
// histogram
|
||||||
addToMap(def(Histogram.class, Histogram::new, "HISTOGRAM"));
|
addToMap(def(Histogram.class, Histogram::new, "HISTOGRAM"));
|
||||||
// Scalar functions
|
// Scalar functions
|
||||||
|
|
|
@ -0,0 +1,39 @@
|
||||||
|
/*
|
||||||
|
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
||||||
|
* or more contributor license agreements. Licensed under the Elastic License;
|
||||||
|
* you may not use this file except in compliance with the Elastic License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.elasticsearch.xpack.sql.expression.function.aggregate;
|
||||||
|
|
||||||
|
import org.elasticsearch.xpack.sql.expression.Expression;
|
||||||
|
import org.elasticsearch.xpack.sql.tree.NodeInfo;
|
||||||
|
import org.elasticsearch.xpack.sql.tree.Source;
|
||||||
|
import org.elasticsearch.xpack.sql.type.DataType;
|
||||||
|
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
public class MedianAbsoluteDeviation extends NumericAggregate {
|
||||||
|
|
||||||
|
public MedianAbsoluteDeviation(Source source, Expression field) {
|
||||||
|
super(source, field);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public DataType dataType() {
|
||||||
|
return DataType.DOUBLE;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Expression replaceChildren(List<Expression> newChildren) {
|
||||||
|
if (newChildren.size() != 1) {
|
||||||
|
throw new IllegalArgumentException("expected [1] child but received [" + newChildren.size() + "]");
|
||||||
|
}
|
||||||
|
return new MedianAbsoluteDeviation(source(), newChildren.get(0));
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected NodeInfo<? extends Expression> info() {
|
||||||
|
return NodeInfo.create(this, MedianAbsoluteDeviation::new, field());
|
||||||
|
}
|
||||||
|
}
|
|
@ -24,6 +24,7 @@ import org.elasticsearch.xpack.sql.expression.function.aggregate.Count;
|
||||||
import org.elasticsearch.xpack.sql.expression.function.aggregate.ExtendedStats;
|
import org.elasticsearch.xpack.sql.expression.function.aggregate.ExtendedStats;
|
||||||
import org.elasticsearch.xpack.sql.expression.function.aggregate.First;
|
import org.elasticsearch.xpack.sql.expression.function.aggregate.First;
|
||||||
import org.elasticsearch.xpack.sql.expression.function.aggregate.Last;
|
import org.elasticsearch.xpack.sql.expression.function.aggregate.Last;
|
||||||
|
import org.elasticsearch.xpack.sql.expression.function.aggregate.MedianAbsoluteDeviation;
|
||||||
import org.elasticsearch.xpack.sql.expression.function.aggregate.MatrixStats;
|
import org.elasticsearch.xpack.sql.expression.function.aggregate.MatrixStats;
|
||||||
import org.elasticsearch.xpack.sql.expression.function.aggregate.Max;
|
import org.elasticsearch.xpack.sql.expression.function.aggregate.Max;
|
||||||
import org.elasticsearch.xpack.sql.expression.function.aggregate.Min;
|
import org.elasticsearch.xpack.sql.expression.function.aggregate.Min;
|
||||||
|
@ -74,6 +75,7 @@ import org.elasticsearch.xpack.sql.querydsl.agg.GroupByValue;
|
||||||
import org.elasticsearch.xpack.sql.querydsl.agg.LeafAgg;
|
import org.elasticsearch.xpack.sql.querydsl.agg.LeafAgg;
|
||||||
import org.elasticsearch.xpack.sql.querydsl.agg.MatrixStatsAgg;
|
import org.elasticsearch.xpack.sql.querydsl.agg.MatrixStatsAgg;
|
||||||
import org.elasticsearch.xpack.sql.querydsl.agg.MaxAgg;
|
import org.elasticsearch.xpack.sql.querydsl.agg.MaxAgg;
|
||||||
|
import org.elasticsearch.xpack.sql.querydsl.agg.MedianAbsoluteDeviationAgg;
|
||||||
import org.elasticsearch.xpack.sql.querydsl.agg.MinAgg;
|
import org.elasticsearch.xpack.sql.querydsl.agg.MinAgg;
|
||||||
import org.elasticsearch.xpack.sql.querydsl.agg.OrAggFilter;
|
import org.elasticsearch.xpack.sql.querydsl.agg.OrAggFilter;
|
||||||
import org.elasticsearch.xpack.sql.querydsl.agg.PercentileRanksAgg;
|
import org.elasticsearch.xpack.sql.querydsl.agg.PercentileRanksAgg;
|
||||||
|
@ -144,7 +146,8 @@ final class QueryTranslator {
|
||||||
new CountAggs(),
|
new CountAggs(),
|
||||||
new DateTimes(),
|
new DateTimes(),
|
||||||
new Firsts(),
|
new Firsts(),
|
||||||
new Lasts()
|
new Lasts(),
|
||||||
|
new MADs()
|
||||||
);
|
);
|
||||||
|
|
||||||
static class QueryTranslation {
|
static class QueryTranslation {
|
||||||
|
@ -833,6 +836,13 @@ final class QueryTranslator {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static class MADs extends SingleValueAggTranslator<MedianAbsoluteDeviation> {
|
||||||
|
@Override
|
||||||
|
protected LeafAgg toAgg(String id, MedianAbsoluteDeviation m) {
|
||||||
|
return new MedianAbsoluteDeviationAgg(id, field(m));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
static class Firsts extends TopHitsAggTranslator<First> {
|
static class Firsts extends TopHitsAggTranslator<First> {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
|
|
@ -0,0 +1,23 @@
|
||||||
|
/*
|
||||||
|
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
||||||
|
* or more contributor license agreements. Licensed under the Elastic License;
|
||||||
|
* you may not use this file except in compliance with the Elastic License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.elasticsearch.xpack.sql.querydsl.agg;
|
||||||
|
|
||||||
|
import org.elasticsearch.search.aggregations.AggregationBuilder;
|
||||||
|
|
||||||
|
import static org.elasticsearch.search.aggregations.AggregationBuilders.medianAbsoluteDeviation;
|
||||||
|
|
||||||
|
public class MedianAbsoluteDeviationAgg extends LeafAgg {
|
||||||
|
|
||||||
|
public MedianAbsoluteDeviationAgg(String id, String fieldName) {
|
||||||
|
super(id, fieldName);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
AggregationBuilder toBuilder() {
|
||||||
|
return medianAbsoluteDeviation(id()).field(fieldName());
|
||||||
|
}
|
||||||
|
}
|
Loading…
Reference in New Issue