remove calcite AggregateRemoveRule to fix nested group by query with order by in outer query (#15237)

* Fixing nested group by query with order by in outer query

* Adding examples
This commit is contained in:
Soumyava 2023-10-24 15:30:13 -07:00 committed by GitHub
parent 4149c9422c
commit 06f40a0019
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 236 additions and 60 deletions

View File

@ -177,7 +177,11 @@ public class CalciteRulesManager
private static final List<RelOptRule> ABSTRACT_RELATIONAL_RULES = private static final List<RelOptRule> ABSTRACT_RELATIONAL_RULES =
ImmutableList.of( ImmutableList.of(
AbstractConverter.ExpandConversionRule.INSTANCE, AbstractConverter.ExpandConversionRule.INSTANCE,
CoreRules.AGGREGATE_REMOVE, // Removing CoreRules.AGGREGATE_REMOVE rule here
// as after the Calcite upgrade, it would plan queries to a scan over a group by
// with ordering on a non-time column
// which is not allowed in Druid. We should add that rule back
// once Druid starts to support non-time ordering over scan queries
CoreRules.UNION_TO_DISTINCT, CoreRules.UNION_TO_DISTINCT,
CoreRules.PROJECT_REMOVE, CoreRules.PROJECT_REMOVE,
CoreRules.AGGREGATE_JOIN_TRANSPOSE, CoreRules.AGGREGATE_JOIN_TRANSPOSE,

View File

@ -50,7 +50,6 @@ import org.apache.druid.query.aggregation.DoubleSumAggregatorFactory;
import org.apache.druid.query.aggregation.ExpressionLambdaAggregatorFactory; import org.apache.druid.query.aggregation.ExpressionLambdaAggregatorFactory;
import org.apache.druid.query.aggregation.FilteredAggregatorFactory; import org.apache.druid.query.aggregation.FilteredAggregatorFactory;
import org.apache.druid.query.aggregation.LongSumAggregatorFactory; import org.apache.druid.query.aggregation.LongSumAggregatorFactory;
import org.apache.druid.query.aggregation.post.ExpressionPostAggregator;
import org.apache.druid.query.dimension.DefaultDimensionSpec; import org.apache.druid.query.dimension.DefaultDimensionSpec;
import org.apache.druid.query.expression.TestExprMacroTable; import org.apache.druid.query.expression.TestExprMacroTable;
import org.apache.druid.query.extraction.SubstringDimExtractionFn; import org.apache.druid.query.extraction.SubstringDimExtractionFn;
@ -3172,45 +3171,55 @@ public class CalciteArraysQueryTest extends BaseCalciteQueryTest
public void testArrayAggGroupByArrayAggFromSubquery() public void testArrayAggGroupByArrayAggFromSubquery()
{ {
cannotVectorize(); cannotVectorize();
skipVectorize();
testQuery( testQuery(
"SELECT dim2, arr, COUNT(*) FROM (SELECT dim2, ARRAY_AGG(DISTINCT dim1) as arr FROM foo WHERE dim1 is not null GROUP BY 1 LIMIT 5) GROUP BY 1,2", "SELECT dim2, arr, COUNT(*) FROM (SELECT dim2, ARRAY_AGG(DISTINCT dim1) as arr FROM foo WHERE dim1 is not null GROUP BY 1 LIMIT 5) GROUP BY 1,2",
QUERY_CONTEXT_NO_STRINGIFY_ARRAY, QUERY_CONTEXT_NO_STRINGIFY_ARRAY,
ImmutableList.of( ImmutableList.of(
new TopNQueryBuilder() GroupByQuery.builder()
.dataSource(CalciteTests.DATASOURCE1) .setDataSource(new TopNQueryBuilder()
.dimension(new DefaultDimensionSpec( .dataSource(CalciteTests.DATASOURCE1)
"dim2", .dimension(new DefaultDimensionSpec(
"d0", "dim2",
ColumnType.STRING "d0",
)) ColumnType.STRING
.metric(new DimensionTopNMetricSpec( ))
null, .metric(new DimensionTopNMetricSpec(
StringComparators.LEXICOGRAPHIC null,
)) StringComparators.LEXICOGRAPHIC
.filters(notNull("dim1")) ))
.threshold(5) .filters(notNull("dim1"))
.aggregators(new ExpressionLambdaAggregatorFactory( .threshold(5)
"a0", .aggregators(new ExpressionLambdaAggregatorFactory(
ImmutableSet.of("dim1"), "a0",
"__acc", ImmutableSet.of("dim1"),
"ARRAY<STRING>[]", "__acc",
"ARRAY<STRING>[]", "ARRAY<STRING>[]",
true, "ARRAY<STRING>[]",
true, true,
false, true,
"array_set_add(\"__acc\", \"dim1\")", false,
"array_set_add_all(\"__acc\", \"a0\")", "array_set_add(\"__acc\", \"dim1\")",
null, "array_set_add_all(\"__acc\", \"a0\")",
null, null,
new HumanReadableBytes(1024), null,
ExprMacroTable.nil() new HumanReadableBytes(1024),
)) ExprMacroTable.nil()
.intervals(querySegmentSpec(Filtration.eternity())) ))
.granularity(Granularities.ALL) .intervals(querySegmentSpec(Filtration.eternity()))
.context(QUERY_CONTEXT_NO_STRINGIFY_ARRAY) .granularity(Granularities.ALL)
.postAggregators(new ExpressionPostAggregator("s0", "1", null, ExprMacroTable.nil())) .context(QUERY_CONTEXT_NO_STRINGIFY_ARRAY)
.build() .build()
)
.setInterval(querySegmentSpec(Filtration.eternity()))
.setGranularity(Granularities.ALL)
.setDimensions(
new DefaultDimensionSpec("d0", "_d0", ColumnType.STRING),
new DefaultDimensionSpec("a0", "_d1", ColumnType.STRING_ARRAY)
)
.setAggregatorSpecs(new CountAggregatorFactory("_a0"))
.setContext(QUERY_CONTEXT_DEFAULT)
.build()
), ),
useDefault ? useDefault ?
ImmutableList.of( ImmutableList.of(

View File

@ -5381,6 +5381,8 @@ public class CalciteJoinQueryTest extends BaseCalciteQueryTest
@Test @Test
public void testPlanWithInFilterMoreThanInSubQueryThreshold() public void testPlanWithInFilterMoreThanInSubQueryThreshold()
{ {
skipVectorize();
cannotVectorize();
String query = "SELECT l1 FROM numfoo WHERE l1 IN (4842, 4844, 4845, 14905, 4853, 29064)"; String query = "SELECT l1 FROM numfoo WHERE l1 IN (4842, 4844, 4845, 14905, 4853, 29064)";
Map<String, Object> queryContext = new HashMap<>(QUERY_CONTEXT_DEFAULT); Map<String, Object> queryContext = new HashMap<>(QUERY_CONTEXT_DEFAULT);
@ -5397,21 +5399,32 @@ public class CalciteJoinQueryTest extends BaseCalciteQueryTest
.dataSource( .dataSource(
JoinDataSource.create( JoinDataSource.create(
new TableDataSource(CalciteTests.DATASOURCE3), new TableDataSource(CalciteTests.DATASOURCE3),
InlineDataSource.fromIterable( new QueryDataSource(
ImmutableList.of( GroupByQuery.builder()
new Object[]{4842L}, .setDataSource(InlineDataSource.fromIterable(
new Object[]{4844L}, ImmutableList.of(
new Object[]{4845L}, new Object[]{4842L},
new Object[]{14905L}, new Object[]{4844L},
new Object[]{4853L}, new Object[]{4845L},
new Object[]{29064L} new Object[]{14905L},
), new Object[]{4853L},
RowSignature.builder() new Object[]{29064L}
.add("ROW_VALUE", ColumnType.LONG) ),
RowSignature.builder()
.add("ROW_VALUE", ColumnType.LONG)
.build()
)
)
.setInterval(querySegmentSpec(Filtration.eternity()))
.setDimensions(
new DefaultDimensionSpec("ROW_VALUE", "d0", ColumnType.LONG)
)
.setGranularity(Granularities.ALL)
.setLimitSpec(NoopLimitSpec.instance())
.build() .build()
), ),
"j0.", "j0.",
"(\"l1\" == \"j0.ROW_VALUE\")", "(\"l1\" == \"j0.d0\")",
JoinType.INNER, JoinType.INNER,
null, null,
ExprMacroTable.nil(), ExprMacroTable.nil(),

View File

@ -13949,15 +13949,32 @@ public class CalciteQueryTest extends BaseCalciteQueryTest
+ "group by 1", + "group by 1",
ImmutableList.of( ImmutableList.of(
GroupByQuery.builder() GroupByQuery.builder()
.setDataSource(CalciteTests.DATASOURCE3) .setDataSource(GroupByQuery.builder()
.setDataSource(CalciteTests.DATASOURCE3)
.setInterval(querySegmentSpec(Intervals.ETERNITY))
.setGranularity(Granularities.ALL)
.addDimension(new DefaultDimensionSpec(
"dim1",
"_d0",
ColumnType.STRING
))
.addAggregator(new LongSumAggregatorFactory("a0", "l1"))
.build()
)
.setInterval(querySegmentSpec(Intervals.ETERNITY)) .setInterval(querySegmentSpec(Intervals.ETERNITY))
.setGranularity(Granularities.ALL) .setDimensions(new DefaultDimensionSpec("_d0", "d0", ColumnType.STRING))
.addDimension(new DefaultDimensionSpec("dim1", "_d0", ColumnType.STRING)) .setAggregatorSpecs(aggregators(
.addAggregator(new LongSumAggregatorFactory("a0", "l1")) new FilteredAggregatorFactory(
.setPostAggregatorSpecs(ImmutableList.of( new CountAggregatorFactory("_a0"),
expressionPostAgg("p0", "case_searched((\"a0\" == 0),1,0)") useDefault ?
selector("a0", "0") :
equality("a0", 0, ColumnType.LONG)
)
)) ))
.setGranularity(Granularities.ALL)
.setContext(QUERY_CONTEXT_DEFAULT)
.build() .build()
), ),
useDefault ? ImmutableList.of( useDefault ? ImmutableList.of(
new Object[]{"", 0L}, new Object[]{"", 0L},
@ -14275,4 +14292,138 @@ public class CalciteQueryTest extends BaseCalciteQueryTest
.run()); .run());
assertThat(e, invalidSqlIs("ASCENDING ordering with NULLS LAST is not supported! (line [1], column [41])")); assertThat(e, invalidSqlIs("ASCENDING ordering with NULLS LAST is not supported! (line [1], column [41])"));
} }
@Test
public void testInGroupByLimitOutGroupByOrderBy()
{
skipVectorize();
cannotVectorize();
testQuery(
"with t AS (SELECT m2, COUNT(m1) as trend_score\n"
+ "FROM \"foo\"\n"
+ "GROUP BY 1 \n"
+ "LIMIT 10\n"
+ ")\n"
+ "select m2, (MAX(trend_score)) from t\n"
+ "where m2 > 2\n"
+ "GROUP BY 1 \n"
+ "ORDER BY 2 DESC",
QUERY_CONTEXT_DEFAULT,
ImmutableList.of(
new GroupByQuery.Builder()
.setDataSource(
new TopNQueryBuilder()
.dataSource(CalciteTests.DATASOURCE1)
.intervals(querySegmentSpec(Filtration.eternity()))
.dimension(new DefaultDimensionSpec("m2", "d0", ColumnType.DOUBLE))
.threshold(10)
.aggregators(aggregators(
useDefault
? new CountAggregatorFactory("a0")
: new FilteredAggregatorFactory(
new CountAggregatorFactory("a0"),
notNull("m1")
)
))
.metric(new DimensionTopNMetricSpec(null, StringComparators.NUMERIC))
.context(OUTER_LIMIT_CONTEXT)
.build()
)
.setInterval(querySegmentSpec(Filtration.eternity()))
.setGranularity(Granularities.ALL)
.setDimensions(
new DefaultDimensionSpec("d0", "_d0", ColumnType.DOUBLE)
)
.setDimFilter(
useDefault ?
bound("d0", "2", null, true, false, null, StringComparators.NUMERIC) :
new RangeFilter("d0", ColumnType.LONG, 2L, null, true, false, null)
)
.setAggregatorSpecs(aggregators(
new LongMaxAggregatorFactory("_a0", "a0")
))
.setLimitSpec(
DefaultLimitSpec
.builder()
.orderBy(new OrderByColumnSpec("_a0", Direction.DESCENDING, StringComparators.NUMERIC))
.build()
)
.setContext(OUTER_LIMIT_CONTEXT)
.build()
),
ImmutableList.of(
new Object[]{3.0D, 1L},
new Object[]{4.0D, 1L},
new Object[]{5.0D, 1L},
new Object[]{6.0D, 1L}
)
);
}
@Test
public void testInGroupByOrderByLimitOutGroupByOrderByLimit()
{
skipVectorize();
cannotVectorize();
testQuery(
"with t AS (SELECT m2 as mo, COUNT(m1) as trend_score\n"
+ "FROM \"foo\"\n"
+ "GROUP BY 1\n"
+ "ORDER BY trend_score DESC\n"
+ "LIMIT 10)\n"
+ "select mo, (MAX(trend_score)) from t\n"
+ "where mo > 2\n"
+ "GROUP BY 1 \n"
+ "ORDER BY 2 DESC LIMIT 2\n",
QUERY_CONTEXT_DEFAULT,
ImmutableList.of(
new GroupByQuery.Builder()
.setDataSource(
new TopNQueryBuilder()
.dataSource(CalciteTests.DATASOURCE1)
.intervals(querySegmentSpec(Filtration.eternity()))
.dimension(new DefaultDimensionSpec("m2", "d0", ColumnType.DOUBLE))
.threshold(10)
.aggregators(aggregators(
useDefault
? new CountAggregatorFactory("a0")
: new FilteredAggregatorFactory(
new CountAggregatorFactory("a0"),
notNull("m1")
)
))
.metric(new NumericTopNMetricSpec("a0"))
.context(OUTER_LIMIT_CONTEXT)
.build()
)
.setInterval(querySegmentSpec(Filtration.eternity()))
.setGranularity(Granularities.ALL)
.setDimensions(
new DefaultDimensionSpec("d0", "_d0", ColumnType.DOUBLE)
)
.setDimFilter(
useDefault ?
bound("d0", "2", null, true, false, null, StringComparators.NUMERIC) :
new RangeFilter("d0", ColumnType.LONG, 2L, null, true, false, null)
)
.setAggregatorSpecs(aggregators(
new LongMaxAggregatorFactory("_a0", "a0")
))
.setLimitSpec(
DefaultLimitSpec
.builder()
.orderBy(new OrderByColumnSpec("_a0", Direction.DESCENDING, StringComparators.NUMERIC))
.limit(2)
.build()
)
.setContext(OUTER_LIMIT_CONTEXT)
.build()
),
ImmutableList.of(
new Object[]{3.0D, 1L},
new Object[]{4.0D, 1L}
)
);
}
} }

View File

@ -4364,7 +4364,6 @@ public class DrillWindowQueryTest extends BaseCalciteQueryTest
windowQueryTest(); windowQueryTest();
} }
@NotYetSupported(Modes.CANNOT_APPLY_VIRTUAL_COL)
@DrillTest("nestedAggs/multiWin_5") @DrillTest("nestedAggs/multiWin_5")
@Test @Test
public void test_nestedAggs_multiWin_5() public void test_nestedAggs_multiWin_5()

View File

@ -8,15 +8,15 @@ false 280487665.00748299428571428571
false 295757331.64717262000000000000 false 295757331.64717262000000000000
false 302662813.16785714333333333333 false 302662813.16785714333333333333
false 304608131.82107142900000000000 false 304608131.82107142900000000000
false 303537640.51502361272727272727 false 303537640.515023651272727272727
true 4.0000000000000000 true 4.0000000000000000
true 4.5000000000000000 true 4.5000000000000000
true 5.2222222222222222 true 5.2222222222222222
true 4101.1041666666666667 true 4101.1041666666666667
true 5907.4433333333333333 true 5907.4433333333333333
true 284524.6472222222222778 true 284524.6472222222222778
true 449300.4527210884353810 true 449300.4527210885000000
true 550414.3805059523809583 true 550414.3805059523809583
true 613525.0542768959435185 true 613525.0542768961000000
true 652829.5088492063491667 true 652829.5088492063491667
true 676669.0989538239537879 true 676669.0989538240000000