SQL: HAVING clause should accept only aggregates (#31872)
Improve Verifier to allow HAVING clauses only on aggregates Close #31726
This commit is contained in:
parent
e955ffc38d
commit
6136e49a05
|
@ -213,10 +213,11 @@ abstract class Verifier {
|
|||
* Check validity of Aggregate/GroupBy.
|
||||
* This rule is needed for multiple reasons:
|
||||
* 1. a user might specify an invalid aggregate (SELECT foo GROUP BY bar)
|
||||
* 2. the order/having might contain a non-grouped attribute. This is typically
|
||||
* 2. the ORDER BY/HAVING might contain a non-grouped attribute. This is typically
|
||||
* caught by the Analyzer however if wrapped in a function (ABS()) it gets resolved
|
||||
* (because the expression gets resolved little by little without being pushed down,
|
||||
* without the Analyzer modifying anything.
|
||||
* 2a. HAVING also requires an Aggregate function
|
||||
* 3. composite agg (used for GROUP BY) allows ordering only on the group keys
|
||||
*/
|
||||
private static boolean checkGroupBy(LogicalPlan p, Set<Failure> localFailures,
|
||||
|
@ -244,7 +245,7 @@ abstract class Verifier {
|
|||
}
|
||||
|
||||
// make sure to compare attributes directly
|
||||
if (Expressions.anyMatch(a.groupings(),
|
||||
if (Expressions.anyMatch(a.groupings(),
|
||||
g -> e.semanticEquals(e instanceof Attribute ? Expressions.attribute(g) : g))) {
|
||||
return;
|
||||
}
|
||||
|
@ -278,13 +279,14 @@ abstract class Verifier {
|
|||
|
||||
Map<Expression, Node<?>> missing = new LinkedHashMap<>();
|
||||
Expression condition = f.condition();
|
||||
condition.collectFirstChildren(c -> checkGroupMatch(c, condition, a.groupings(), missing, functions));
|
||||
// variation of checkGroupMatch customized for HAVING, which requires just aggregations
|
||||
condition.collectFirstChildren(c -> checkGroupByHavingHasOnlyAggs(c, condition, missing, functions));
|
||||
|
||||
if (!missing.isEmpty()) {
|
||||
String plural = missing.size() > 1 ? "s" : StringUtils.EMPTY;
|
||||
localFailures.add(fail(condition, "Cannot filter by non-grouped column" + plural + " %s, expected %s",
|
||||
Expressions.names(missing.keySet()),
|
||||
Expressions.names(a.groupings())));
|
||||
localFailures.add(
|
||||
fail(condition, "Cannot filter HAVING on non-aggregate" + plural + " %s; consider using WHERE instead",
|
||||
Expressions.names(missing.keySet())));
|
||||
groupingFailures.add(a);
|
||||
return false;
|
||||
}
|
||||
|
@ -294,6 +296,57 @@ abstract class Verifier {
|
|||
}
|
||||
|
||||
|
||||
private static boolean checkGroupByHavingHasOnlyAggs(Expression e, Node<?> source,
|
||||
Map<Expression, Node<?>> missing, Map<String, Function> functions) {
|
||||
|
||||
// resolve FunctionAttribute to backing functions
|
||||
if (e instanceof FunctionAttribute) {
|
||||
FunctionAttribute fa = (FunctionAttribute) e;
|
||||
Function function = functions.get(fa.functionId());
|
||||
// TODO: this should be handled by a different rule
|
||||
if (function == null) {
|
||||
return false;
|
||||
}
|
||||
e = function;
|
||||
}
|
||||
|
||||
// scalar functions can be a binary tree
|
||||
// first test the function against the grouping
|
||||
// and if that fails, start unpacking hoping to find matches
|
||||
if (e instanceof ScalarFunction) {
|
||||
ScalarFunction sf = (ScalarFunction) e;
|
||||
|
||||
// unwrap function to find the base
|
||||
for (Expression arg : sf.arguments()) {
|
||||
arg.collectFirstChildren(c -> checkGroupByHavingHasOnlyAggs(c, source, missing, functions));
|
||||
}
|
||||
return true;
|
||||
|
||||
} else if (e instanceof Score) {
|
||||
// Score can't be used for having
|
||||
missing.put(e, source);
|
||||
return true;
|
||||
}
|
||||
|
||||
// skip literals / foldable
|
||||
if (e.foldable()) {
|
||||
return true;
|
||||
}
|
||||
// skip aggs (allowed to refer to non-group columns)
|
||||
if (Functions.isAggregate(e)) {
|
||||
return true;
|
||||
}
|
||||
|
||||
// left without leaves which have to match; that's a failure since everything should be based on an agg
|
||||
if (e instanceof Attribute) {
|
||||
missing.put(e, source);
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
// check whether plain columns specified in an agg are mentioned in the group-by
|
||||
private static boolean checkGroupByAgg(LogicalPlan p, Set<Failure> localFailures,
|
||||
Set<LogicalPlan> groupingFailures, Map<String, Function> functions) {
|
||||
|
|
|
@ -159,4 +159,14 @@ public class VerifierErrorMessagesTests extends ESTestCase {
|
|||
assertEquals("1:44: Cannot order by non-grouped column [SCORE()], expected [int]",
|
||||
verify("SELECT int FROM test GROUP BY int ORDER BY SCORE()"));
|
||||
}
|
||||
|
||||
public void testHavingOnColumn() {
|
||||
assertEquals("1:42: Cannot filter HAVING on non-aggregate [int]; consider using WHERE instead",
|
||||
verify("SELECT int FROM test GROUP BY int HAVING int > 2"));
|
||||
}
|
||||
|
||||
public void testHavingOnScalar() {
|
||||
assertEquals("1:42: Cannot filter HAVING on non-aggregate [int]; consider using WHERE instead",
|
||||
verify("SELECT int FROM test GROUP BY int HAVING 2 < ABS(int)"));
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue