SQL: Introduce MAD (MedianAbsoluteDeviation) aggregation (#40048)
Add Median Absolute Deviation aggregation Fix #39597 (cherry picked from commit 4f09613942a9249d06c74da64ad7e6f362e97f56)
This commit is contained in:
parent
0d152a54f8
commit
3960374a6f
|
@ -407,6 +407,30 @@ https://en.wikipedia.org/wiki/Kurtosis[Quantify] the shape of the distribution o
|
|||
include-tagged::{sql-specs}/docs.csv-spec[aggKurtosis]
|
||||
--------------------------------------------------
|
||||
|
||||
[[sql-functions-aggs-mad]]
|
||||
===== `MAD`
|
||||
|
||||
.Synopsis:
|
||||
[source, sql]
|
||||
--------------------------------------------------
|
||||
MAD(field_name<1>)
|
||||
--------------------------------------------------
|
||||
|
||||
*Input*:
|
||||
|
||||
<1> a numeric field
|
||||
|
||||
*Output*: `double` numeric value
|
||||
|
||||
.Description:
|
||||
|
||||
https://en.wikipedia.org/wiki/Median_absolute_deviation[Measure] the variability of the input values in the field `field_name`.
|
||||
|
||||
["source","sql",subs="attributes,macros"]
|
||||
--------------------------------------------------
|
||||
include-tagged::{sql-specs}/docs.csv-spec[aggMad]
|
||||
--------------------------------------------------
|
||||
|
||||
[[sql-functions-aggs-percentile]]
|
||||
===== `PERCENTILE`
|
||||
|
||||
|
|
|
@ -469,3 +469,47 @@ null | 1 | 10
|
|||
F | 4 | 6
|
||||
M | 1 | 4
|
||||
;
|
||||
|
||||
medianAbsoluteDeviation
|
||||
schema::gender:s|mad:d
|
||||
SELECT gender, MAD(salary) AS mad FROM test_emp GROUP BY gender ORDER BY gender;
|
||||
|
||||
gender | mad
|
||||
---------------+---------------
|
||||
null |10789.0
|
||||
F |12719.0
|
||||
M |8905.0
|
||||
;
|
||||
|
||||
medianAbsoluteDeviationOnTwoFields
|
||||
schema::gender:s|avg:l|mad_s:l|mad_l:d
|
||||
SELECT gender, FLOOR(AVG(salary)) AS avg, FLOOR(MAD(salary)) AS mad_s, MAD(languages) AS mad_l FROM test_emp GROUP BY gender ORDER BY gender;
|
||||
|
||||
gender | avg | mad_s | mad_l
|
||||
---------------+---------------+---------------+---------------
|
||||
null |48760 |10789 |2.0
|
||||
F |50490 |12719 |1.5
|
||||
M |46860 |8905 |1.0
|
||||
;
|
||||
|
||||
medianAbsoluteDeviationOnSecondaryFieldWithOrder
|
||||
schema::gender:s|mad:d
|
||||
SELECT gender, MAD(salary) AS mad FROM test_emp GROUP BY gender ORDER BY mad ASC;
|
||||
|
||||
gender | mad
|
||||
---------------+---------------
|
||||
M |8905.0
|
||||
null |10789.0
|
||||
F |12719.0
|
||||
;
|
||||
|
||||
|
||||
medianAbsoluteDeviationOnSecondaryFieldWithOrderAndHaving
|
||||
schema::gender:s|mad:d
|
||||
SELECT gender, MAD(salary) AS mad FROM test_emp GROUP BY gender HAVING mad > 10000 ORDER BY mad ASC;
|
||||
|
||||
gender | mad
|
||||
---------------+---------------
|
||||
null |10789.0
|
||||
F |12719.0
|
||||
;
|
||||
|
|
|
@ -17,6 +17,7 @@ MAX |AGGREGATE
|
|||
MIN |AGGREGATE
|
||||
SUM |AGGREGATE
|
||||
KURTOSIS |AGGREGATE
|
||||
MAD |AGGREGATE
|
||||
PERCENTILE |AGGREGATE
|
||||
PERCENTILE_RANK |AGGREGATE
|
||||
SKEWNESS |AGGREGATE
|
||||
|
|
|
@ -194,6 +194,7 @@ MAX |AGGREGATE
|
|||
MIN |AGGREGATE
|
||||
SUM |AGGREGATE
|
||||
KURTOSIS |AGGREGATE
|
||||
MAD |AGGREGATE
|
||||
PERCENTILE |AGGREGATE
|
||||
PERCENTILE_RANK |AGGREGATE
|
||||
SKEWNESS |AGGREGATE
|
||||
|
@ -1183,6 +1184,16 @@ SELECT MIN(salary) AS min, MAX(salary) AS max, KURTOSIS(salary) AS k FROM emp;
|
|||
// end::aggKurtosis
|
||||
;
|
||||
|
||||
aggMad
|
||||
// tag::aggMad
|
||||
SELECT MIN(salary) AS min, MAX(salary) AS max, AVG(salary) AS avg, MAD(salary) AS mad FROM emp;
|
||||
|
||||
min | max | avg | mad
|
||||
---------------+---------------+---------------+---------------
|
||||
25324 |74999 |48248.55 |10096.5
|
||||
// end::aggMad
|
||||
;
|
||||
|
||||
aggPercentile
|
||||
// tag::aggPercentile
|
||||
SELECT languages, PERCENTILE(salary, 95) AS "95th" FROM emp
|
||||
|
|
|
@ -14,6 +14,7 @@ import org.elasticsearch.xpack.sql.expression.function.aggregate.First;
|
|||
import org.elasticsearch.xpack.sql.expression.function.aggregate.Kurtosis;
|
||||
import org.elasticsearch.xpack.sql.expression.function.aggregate.Last;
|
||||
import org.elasticsearch.xpack.sql.expression.function.aggregate.Max;
|
||||
import org.elasticsearch.xpack.sql.expression.function.aggregate.MedianAbsoluteDeviation;
|
||||
import org.elasticsearch.xpack.sql.expression.function.aggregate.Min;
|
||||
import org.elasticsearch.xpack.sql.expression.function.aggregate.Percentile;
|
||||
import org.elasticsearch.xpack.sql.expression.function.aggregate.PercentileRank;
|
||||
|
@ -153,13 +154,16 @@ public class FunctionRegistry {
|
|||
def(Min.class, Min::new, "MIN"),
|
||||
def(Sum.class, Sum::new, "SUM"));
|
||||
// Statistics
|
||||
addToMap(def(StddevPop.class, StddevPop::new, "STDDEV_POP"),
|
||||
def(VarPop.class, VarPop::new,"VAR_POP"),
|
||||
addToMap(
|
||||
def(Kurtosis.class, Kurtosis::new, "KURTOSIS"),
|
||||
def(MedianAbsoluteDeviation.class, MedianAbsoluteDeviation::new, "MAD"),
|
||||
def(Percentile.class, Percentile::new, "PERCENTILE"),
|
||||
def(PercentileRank.class, PercentileRank::new, "PERCENTILE_RANK"),
|
||||
def(SumOfSquares.class, SumOfSquares::new, "SUM_OF_SQUARES"),
|
||||
def(Skewness.class, Skewness::new, "SKEWNESS"),
|
||||
def(Kurtosis.class, Kurtosis::new, "KURTOSIS"));
|
||||
def(StddevPop.class, StddevPop::new, "STDDEV_POP"),
|
||||
def(SumOfSquares.class, SumOfSquares::new, "SUM_OF_SQUARES"),
|
||||
def(VarPop.class, VarPop::new,"VAR_POP")
|
||||
);
|
||||
// histogram
|
||||
addToMap(def(Histogram.class, Histogram::new, "HISTOGRAM"));
|
||||
// Scalar functions
|
||||
|
|
|
@ -0,0 +1,39 @@
|
|||
/*
|
||||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
||||
* or more contributor license agreements. Licensed under the Elastic License;
|
||||
* you may not use this file except in compliance with the Elastic License.
|
||||
*/
|
||||
|
||||
package org.elasticsearch.xpack.sql.expression.function.aggregate;
|
||||
|
||||
import org.elasticsearch.xpack.sql.expression.Expression;
|
||||
import org.elasticsearch.xpack.sql.tree.NodeInfo;
|
||||
import org.elasticsearch.xpack.sql.tree.Source;
|
||||
import org.elasticsearch.xpack.sql.type.DataType;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
public class MedianAbsoluteDeviation extends NumericAggregate {
|
||||
|
||||
public MedianAbsoluteDeviation(Source source, Expression field) {
|
||||
super(source, field);
|
||||
}
|
||||
|
||||
@Override
|
||||
public DataType dataType() {
|
||||
return DataType.DOUBLE;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Expression replaceChildren(List<Expression> newChildren) {
|
||||
if (newChildren.size() != 1) {
|
||||
throw new IllegalArgumentException("expected [1] child but received [" + newChildren.size() + "]");
|
||||
}
|
||||
return new MedianAbsoluteDeviation(source(), newChildren.get(0));
|
||||
}
|
||||
|
||||
@Override
|
||||
protected NodeInfo<? extends Expression> info() {
|
||||
return NodeInfo.create(this, MedianAbsoluteDeviation::new, field());
|
||||
}
|
||||
}
|
|
@ -24,6 +24,7 @@ import org.elasticsearch.xpack.sql.expression.function.aggregate.Count;
|
|||
import org.elasticsearch.xpack.sql.expression.function.aggregate.ExtendedStats;
|
||||
import org.elasticsearch.xpack.sql.expression.function.aggregate.First;
|
||||
import org.elasticsearch.xpack.sql.expression.function.aggregate.Last;
|
||||
import org.elasticsearch.xpack.sql.expression.function.aggregate.MedianAbsoluteDeviation;
|
||||
import org.elasticsearch.xpack.sql.expression.function.aggregate.MatrixStats;
|
||||
import org.elasticsearch.xpack.sql.expression.function.aggregate.Max;
|
||||
import org.elasticsearch.xpack.sql.expression.function.aggregate.Min;
|
||||
|
@ -74,6 +75,7 @@ import org.elasticsearch.xpack.sql.querydsl.agg.GroupByValue;
|
|||
import org.elasticsearch.xpack.sql.querydsl.agg.LeafAgg;
|
||||
import org.elasticsearch.xpack.sql.querydsl.agg.MatrixStatsAgg;
|
||||
import org.elasticsearch.xpack.sql.querydsl.agg.MaxAgg;
|
||||
import org.elasticsearch.xpack.sql.querydsl.agg.MedianAbsoluteDeviationAgg;
|
||||
import org.elasticsearch.xpack.sql.querydsl.agg.MinAgg;
|
||||
import org.elasticsearch.xpack.sql.querydsl.agg.OrAggFilter;
|
||||
import org.elasticsearch.xpack.sql.querydsl.agg.PercentileRanksAgg;
|
||||
|
@ -144,7 +146,8 @@ final class QueryTranslator {
|
|||
new CountAggs(),
|
||||
new DateTimes(),
|
||||
new Firsts(),
|
||||
new Lasts()
|
||||
new Lasts(),
|
||||
new MADs()
|
||||
);
|
||||
|
||||
static class QueryTranslation {
|
||||
|
@ -833,6 +836,13 @@ final class QueryTranslator {
|
|||
}
|
||||
}
|
||||
|
||||
static class MADs extends SingleValueAggTranslator<MedianAbsoluteDeviation> {
|
||||
@Override
|
||||
protected LeafAgg toAgg(String id, MedianAbsoluteDeviation m) {
|
||||
return new MedianAbsoluteDeviationAgg(id, field(m));
|
||||
}
|
||||
}
|
||||
|
||||
static class Firsts extends TopHitsAggTranslator<First> {
|
||||
|
||||
@Override
|
||||
|
|
|
@ -0,0 +1,23 @@
|
|||
/*
|
||||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
||||
* or more contributor license agreements. Licensed under the Elastic License;
|
||||
* you may not use this file except in compliance with the Elastic License.
|
||||
*/
|
||||
|
||||
package org.elasticsearch.xpack.sql.querydsl.agg;
|
||||
|
||||
import org.elasticsearch.search.aggregations.AggregationBuilder;
|
||||
|
||||
import static org.elasticsearch.search.aggregations.AggregationBuilders.medianAbsoluteDeviation;
|
||||
|
||||
public class MedianAbsoluteDeviationAgg extends LeafAgg {
|
||||
|
||||
public MedianAbsoluteDeviationAgg(String id, String fieldName) {
|
||||
super(id, fieldName);
|
||||
}
|
||||
|
||||
@Override
|
||||
AggregationBuilder toBuilder() {
|
||||
return medianAbsoluteDeviation(id()).field(fieldName());
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue