SQL: Verify GROUP BY ordering on grouped columns (#30585)

Due to the way composite aggregation works, ordering in GROUP BY can be
applied only through grouped columns which now the analyzer verifier
enforces.

Fix 29900
This commit is contained in:
Costin Leau 2018-05-15 22:46:46 +03:00 committed by GitHub
parent 21b9170dec
commit 09329eb84f
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 51 additions and 5 deletions

View File

@ -211,12 +211,13 @@ abstract class Verifier {
/**
* Check validity of Aggregate/GroupBy.
* This rule is needed for two reasons:
* This rule is needed for multiple reasons:
* 1. a user might specify an invalid aggregate (SELECT foo GROUP BY bar)
* 2. the order/having might contain a non-grouped attribute. This is typically
* caught by the Analyzer however if wrapped in a function (ABS()) it gets resolved
* (because the expression gets resolved little by little without being pushed down,
* without the Analyzer modifying anything.
* 3. composite agg (used for GROUP BY) allows ordering only on the group keys
*/
private static boolean checkGroupBy(LogicalPlan p, Set<Failure> localFailures,
Map<String, Function> resolvedFunctions, Set<LogicalPlan> groupingFailures) {
@ -225,7 +226,7 @@ abstract class Verifier {
&& checkGroupByHaving(p, localFailures, groupingFailures, resolvedFunctions);
}
// check whether an orderBy failed
// check whether an orderBy failed or if it occurs on a non-key
private static boolean checkGroupByOrder(LogicalPlan p, Set<Failure> localFailures,
Set<LogicalPlan> groupingFailures, Map<String, Function> functions) {
if (p instanceof OrderBy) {
@ -234,7 +235,23 @@ abstract class Verifier {
Aggregate a = (Aggregate) o.child();
Map<Expression, Node<?>> missing = new LinkedHashMap<>();
o.order().forEach(oe -> oe.collectFirstChildren(c -> checkGroupMatch(c, oe, a.groupings(), missing, functions)));
o.order().forEach(oe -> {
Expression e = oe.child();
// cannot order by aggregates (not supported by composite)
if (Functions.isAggregate(e)) {
missing.put(e, oe);
return;
}
// make sure to compare attributes directly
if (Expressions.anyMatch(a.groupings(),
g -> e.semanticEquals(e instanceof Attribute ? Expressions.attribute(g) : g))) {
return;
}
// nothing matched, cannot group by it
missing.put(e, oe);
});
if (!missing.isEmpty()) {
String plural = missing.size() > 1 ? "s" : StringUtils.EMPTY;

View File

@ -111,7 +111,7 @@ public class VerifierErrorMessagesTests extends ESTestCase {
}
public void testGroupByOrderByScalarOverNonGrouped() {
assertEquals("1:50: Cannot order by non-grouped column [date], expected [text]",
assertEquals("1:50: Cannot order by non-grouped column [YEAR(date [UTC])], expected [text]",
verify("SELECT MAX(int) FROM test GROUP BY text ORDER BY YEAR(date)"));
}
@ -144,4 +144,19 @@ public class VerifierErrorMessagesTests extends ESTestCase {
assertEquals("1:8: Cannot use field [unsupported] type [ip_range] as is unsupported",
verify("SELECT unsupported FROM test"));
}
}
public void testGroupByOrderByNonKey() {
assertEquals("1:52: Cannot order by non-grouped column [a], expected [bool]",
verify("SELECT AVG(int) a FROM test GROUP BY bool ORDER BY a"));
}
public void testGroupByOrderByFunctionOverKey() {
assertEquals("1:44: Cannot order by non-grouped column [MAX(int)], expected [int]",
verify("SELECT int FROM test GROUP BY int ORDER BY MAX(int)"));
}
public void testGroupByOrderByScore() {
assertEquals("1:44: Cannot order by non-grouped column [SCORE()], expected [int]",
verify("SELECT int FROM test GROUP BY int ORDER BY SCORE()"));
}
}

View File

@ -49,4 +49,18 @@ public class VerifierErrorMessagesTests extends ESTestCase {
assertEquals("1:32: Currently, only a single expression can be used with GROUP BY; please select one of [bool, keyword]",
verify("SELECT bool FROM test GROUP BY bool, keyword"));
}
//
// TODO potential improvements
//
// regarding resolution
// public void testGroupByOrderByKeyAlias() {
// assertEquals("1:8: Cannot use field [unsupported] type [ip_range] as is unsupported",
// verify("SELECT int i FROM test GROUP BY int ORDER BY i"));
// }
//
// public void testGroupByAlias() {
// assertEquals("1:8: Cannot use field [unsupported] type [ip_range] as is unsupported",
// verify("SELECT int i FROM test GROUP BY i ORDER BY int"));
// }
}