SQL: Introduce MAD (MedianAbsoluteDeviation) aggregation (#40048)

Add Median Absolute Deviation aggregation

Fix #39597

(cherry picked from commit 4f09613942a9249d06c74da64ad7e6f362e97f56)
This commit is contained in:
Costin Leau 2019-03-15 11:45:10 +02:00 committed by Costin Leau
parent 0d152a54f8
commit 3960374a6f
8 changed files with 163 additions and 7 deletions

View File

@ -407,6 +407,30 @@ https://en.wikipedia.org/wiki/Kurtosis[Quantify] the shape of the distribution o
include-tagged::{sql-specs}/docs.csv-spec[aggKurtosis]
--------------------------------------------------
[[sql-functions-aggs-mad]]
===== `MAD`
.Synopsis:
[source, sql]
--------------------------------------------------
MAD(field_name<1>)
--------------------------------------------------
*Input*:
<1> a numeric field
*Output*: `double` numeric value
.Description:
https://en.wikipedia.org/wiki/Median_absolute_deviation[Measure] the variability of the input values in the field `field_name`.
["source","sql",subs="attributes,macros"]
--------------------------------------------------
include-tagged::{sql-specs}/docs.csv-spec[aggMad]
--------------------------------------------------
[[sql-functions-aggs-percentile]]
===== `PERCENTILE`

View File

@ -469,3 +469,47 @@ null | 1 | 10
F | 4 | 6
M | 1 | 4
;
medianAbsoluteDeviation
schema::gender:s|mad:d
SELECT gender, MAD(salary) AS mad FROM test_emp GROUP BY gender ORDER BY gender;
gender | mad
---------------+---------------
null |10789.0
F |12719.0
M |8905.0
;
medianAbsoluteDeviationOnTwoFields
schema::gender:s|avg:l|mad_s:l|mad_l:d
SELECT gender, FLOOR(AVG(salary)) AS avg, FLOOR(MAD(salary)) AS mad_s, MAD(languages) AS mad_l FROM test_emp GROUP BY gender ORDER BY gender;
gender | avg | mad_s | mad_l
---------------+---------------+---------------+---------------
null |48760 |10789 |2.0
F |50490 |12719 |1.5
M |46860 |8905 |1.0
;
medianAbsoluteDeviationOnSecondaryFieldWithOrder
schema::gender:s|mad:d
SELECT gender, MAD(salary) AS mad FROM test_emp GROUP BY gender ORDER BY mad ASC;
gender | mad
---------------+---------------
M |8905.0
null |10789.0
F |12719.0
;
medianAbsoluteDeviationOnSecondaryFieldWithOrderAndHaving
schema::gender:s|mad:d
SELECT gender, MAD(salary) AS mad FROM test_emp GROUP BY gender HAVING mad > 10000 ORDER BY mad ASC;
gender | mad
---------------+---------------
null |10789.0
F |12719.0
;

View File

@ -17,6 +17,7 @@ MAX |AGGREGATE
MIN |AGGREGATE
SUM |AGGREGATE
KURTOSIS |AGGREGATE
MAD |AGGREGATE
PERCENTILE |AGGREGATE
PERCENTILE_RANK |AGGREGATE
SKEWNESS |AGGREGATE

View File

@ -194,6 +194,7 @@ MAX |AGGREGATE
MIN |AGGREGATE
SUM |AGGREGATE
KURTOSIS |AGGREGATE
MAD |AGGREGATE
PERCENTILE |AGGREGATE
PERCENTILE_RANK |AGGREGATE
SKEWNESS |AGGREGATE
@ -1183,6 +1184,16 @@ SELECT MIN(salary) AS min, MAX(salary) AS max, KURTOSIS(salary) AS k FROM emp;
// end::aggKurtosis
;
aggMad
// tag::aggMad
SELECT MIN(salary) AS min, MAX(salary) AS max, AVG(salary) AS avg, MAD(salary) AS mad FROM emp;
min | max | avg | mad
---------------+---------------+---------------+---------------
25324 |74999 |48248.55 |10096.5
// end::aggMad
;
aggPercentile
// tag::aggPercentile
SELECT languages, PERCENTILE(salary, 95) AS "95th" FROM emp

View File

@ -14,6 +14,7 @@ import org.elasticsearch.xpack.sql.expression.function.aggregate.First;
import org.elasticsearch.xpack.sql.expression.function.aggregate.Kurtosis;
import org.elasticsearch.xpack.sql.expression.function.aggregate.Last;
import org.elasticsearch.xpack.sql.expression.function.aggregate.Max;
import org.elasticsearch.xpack.sql.expression.function.aggregate.MedianAbsoluteDeviation;
import org.elasticsearch.xpack.sql.expression.function.aggregate.Min;
import org.elasticsearch.xpack.sql.expression.function.aggregate.Percentile;
import org.elasticsearch.xpack.sql.expression.function.aggregate.PercentileRank;
@ -153,13 +154,16 @@ public class FunctionRegistry {
def(Min.class, Min::new, "MIN"),
def(Sum.class, Sum::new, "SUM"));
// Statistics
addToMap(def(StddevPop.class, StddevPop::new, "STDDEV_POP"),
def(VarPop.class, VarPop::new,"VAR_POP"),
addToMap(
def(Kurtosis.class, Kurtosis::new, "KURTOSIS"),
def(MedianAbsoluteDeviation.class, MedianAbsoluteDeviation::new, "MAD"),
def(Percentile.class, Percentile::new, "PERCENTILE"),
def(PercentileRank.class, PercentileRank::new, "PERCENTILE_RANK"),
def(SumOfSquares.class, SumOfSquares::new, "SUM_OF_SQUARES"),
def(Skewness.class, Skewness::new, "SKEWNESS"),
def(Kurtosis.class, Kurtosis::new, "KURTOSIS"));
def(StddevPop.class, StddevPop::new, "STDDEV_POP"),
def(SumOfSquares.class, SumOfSquares::new, "SUM_OF_SQUARES"),
def(VarPop.class, VarPop::new,"VAR_POP")
);
// histogram
addToMap(def(Histogram.class, Histogram::new, "HISTOGRAM"));
// Scalar functions

View File

@ -0,0 +1,39 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License;
* you may not use this file except in compliance with the Elastic License.
*/
package org.elasticsearch.xpack.sql.expression.function.aggregate;
import org.elasticsearch.xpack.sql.expression.Expression;
import org.elasticsearch.xpack.sql.tree.NodeInfo;
import org.elasticsearch.xpack.sql.tree.Source;
import org.elasticsearch.xpack.sql.type.DataType;
import java.util.List;
public class MedianAbsoluteDeviation extends NumericAggregate {
public MedianAbsoluteDeviation(Source source, Expression field) {
super(source, field);
}
@Override
public DataType dataType() {
return DataType.DOUBLE;
}
@Override
public Expression replaceChildren(List<Expression> newChildren) {
if (newChildren.size() != 1) {
throw new IllegalArgumentException("expected [1] child but received [" + newChildren.size() + "]");
}
return new MedianAbsoluteDeviation(source(), newChildren.get(0));
}
@Override
protected NodeInfo<? extends Expression> info() {
return NodeInfo.create(this, MedianAbsoluteDeviation::new, field());
}
}

View File

@ -24,6 +24,7 @@ import org.elasticsearch.xpack.sql.expression.function.aggregate.Count;
import org.elasticsearch.xpack.sql.expression.function.aggregate.ExtendedStats;
import org.elasticsearch.xpack.sql.expression.function.aggregate.First;
import org.elasticsearch.xpack.sql.expression.function.aggregate.Last;
import org.elasticsearch.xpack.sql.expression.function.aggregate.MedianAbsoluteDeviation;
import org.elasticsearch.xpack.sql.expression.function.aggregate.MatrixStats;
import org.elasticsearch.xpack.sql.expression.function.aggregate.Max;
import org.elasticsearch.xpack.sql.expression.function.aggregate.Min;
@ -74,6 +75,7 @@ import org.elasticsearch.xpack.sql.querydsl.agg.GroupByValue;
import org.elasticsearch.xpack.sql.querydsl.agg.LeafAgg;
import org.elasticsearch.xpack.sql.querydsl.agg.MatrixStatsAgg;
import org.elasticsearch.xpack.sql.querydsl.agg.MaxAgg;
import org.elasticsearch.xpack.sql.querydsl.agg.MedianAbsoluteDeviationAgg;
import org.elasticsearch.xpack.sql.querydsl.agg.MinAgg;
import org.elasticsearch.xpack.sql.querydsl.agg.OrAggFilter;
import org.elasticsearch.xpack.sql.querydsl.agg.PercentileRanksAgg;
@ -144,7 +146,8 @@ final class QueryTranslator {
new CountAggs(),
new DateTimes(),
new Firsts(),
new Lasts()
new Lasts(),
new MADs()
);
static class QueryTranslation {
@ -833,6 +836,13 @@ final class QueryTranslator {
}
}
static class MADs extends SingleValueAggTranslator<MedianAbsoluteDeviation> {
@Override
protected LeafAgg toAgg(String id, MedianAbsoluteDeviation m) {
return new MedianAbsoluteDeviationAgg(id, field(m));
}
}
static class Firsts extends TopHitsAggTranslator<First> {
@Override

View File

@ -0,0 +1,23 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License;
* you may not use this file except in compliance with the Elastic License.
*/
package org.elasticsearch.xpack.sql.querydsl.agg;
import org.elasticsearch.search.aggregations.AggregationBuilder;
import static org.elasticsearch.search.aggregations.AggregationBuilders.medianAbsoluteDeviation;
public class MedianAbsoluteDeviationAgg extends LeafAgg {
public MedianAbsoluteDeviationAgg(String id, String fieldName) {
super(id, fieldName);
}
@Override
AggregationBuilder toBuilder() {
return medianAbsoluteDeviation(id()).field(fieldName());
}
}