mirror of https://github.com/apache/druid.git
remove calcite AggregateRemoveRule to fix nested group by query with order by in outer query (#15237)
* Fixing nested group by query with order by in outer query * Adding examples
This commit is contained in:
parent
4149c9422c
commit
06f40a0019
|
@ -177,7 +177,11 @@ public class CalciteRulesManager
|
|||
private static final List<RelOptRule> ABSTRACT_RELATIONAL_RULES =
|
||||
ImmutableList.of(
|
||||
AbstractConverter.ExpandConversionRule.INSTANCE,
|
||||
CoreRules.AGGREGATE_REMOVE,
|
||||
// Removing CoreRules.AGGREGATE_REMOVE rule here
|
||||
// as after the Calcite upgrade, it would plan queries to a scan over a group by
|
||||
// with ordering on a non-time column
|
||||
// which is not allowed in Druid. We should add that rule back
|
||||
// once Druid starts to support non-time ordering over scan queries
|
||||
CoreRules.UNION_TO_DISTINCT,
|
||||
CoreRules.PROJECT_REMOVE,
|
||||
CoreRules.AGGREGATE_JOIN_TRANSPOSE,
|
||||
|
|
|
@ -50,7 +50,6 @@ import org.apache.druid.query.aggregation.DoubleSumAggregatorFactory;
|
|||
import org.apache.druid.query.aggregation.ExpressionLambdaAggregatorFactory;
|
||||
import org.apache.druid.query.aggregation.FilteredAggregatorFactory;
|
||||
import org.apache.druid.query.aggregation.LongSumAggregatorFactory;
|
||||
import org.apache.druid.query.aggregation.post.ExpressionPostAggregator;
|
||||
import org.apache.druid.query.dimension.DefaultDimensionSpec;
|
||||
import org.apache.druid.query.expression.TestExprMacroTable;
|
||||
import org.apache.druid.query.extraction.SubstringDimExtractionFn;
|
||||
|
@ -3172,45 +3171,55 @@ public class CalciteArraysQueryTest extends BaseCalciteQueryTest
|
|||
public void testArrayAggGroupByArrayAggFromSubquery()
|
||||
{
|
||||
cannotVectorize();
|
||||
|
||||
skipVectorize();
|
||||
testQuery(
|
||||
"SELECT dim2, arr, COUNT(*) FROM (SELECT dim2, ARRAY_AGG(DISTINCT dim1) as arr FROM foo WHERE dim1 is not null GROUP BY 1 LIMIT 5) GROUP BY 1,2",
|
||||
QUERY_CONTEXT_NO_STRINGIFY_ARRAY,
|
||||
ImmutableList.of(
|
||||
new TopNQueryBuilder()
|
||||
.dataSource(CalciteTests.DATASOURCE1)
|
||||
.dimension(new DefaultDimensionSpec(
|
||||
"dim2",
|
||||
"d0",
|
||||
ColumnType.STRING
|
||||
))
|
||||
.metric(new DimensionTopNMetricSpec(
|
||||
null,
|
||||
StringComparators.LEXICOGRAPHIC
|
||||
))
|
||||
.filters(notNull("dim1"))
|
||||
.threshold(5)
|
||||
.aggregators(new ExpressionLambdaAggregatorFactory(
|
||||
"a0",
|
||||
ImmutableSet.of("dim1"),
|
||||
"__acc",
|
||||
"ARRAY<STRING>[]",
|
||||
"ARRAY<STRING>[]",
|
||||
true,
|
||||
true,
|
||||
false,
|
||||
"array_set_add(\"__acc\", \"dim1\")",
|
||||
"array_set_add_all(\"__acc\", \"a0\")",
|
||||
null,
|
||||
null,
|
||||
new HumanReadableBytes(1024),
|
||||
ExprMacroTable.nil()
|
||||
))
|
||||
.intervals(querySegmentSpec(Filtration.eternity()))
|
||||
.granularity(Granularities.ALL)
|
||||
.context(QUERY_CONTEXT_NO_STRINGIFY_ARRAY)
|
||||
.postAggregators(new ExpressionPostAggregator("s0", "1", null, ExprMacroTable.nil()))
|
||||
.build()
|
||||
GroupByQuery.builder()
|
||||
.setDataSource(new TopNQueryBuilder()
|
||||
.dataSource(CalciteTests.DATASOURCE1)
|
||||
.dimension(new DefaultDimensionSpec(
|
||||
"dim2",
|
||||
"d0",
|
||||
ColumnType.STRING
|
||||
))
|
||||
.metric(new DimensionTopNMetricSpec(
|
||||
null,
|
||||
StringComparators.LEXICOGRAPHIC
|
||||
))
|
||||
.filters(notNull("dim1"))
|
||||
.threshold(5)
|
||||
.aggregators(new ExpressionLambdaAggregatorFactory(
|
||||
"a0",
|
||||
ImmutableSet.of("dim1"),
|
||||
"__acc",
|
||||
"ARRAY<STRING>[]",
|
||||
"ARRAY<STRING>[]",
|
||||
true,
|
||||
true,
|
||||
false,
|
||||
"array_set_add(\"__acc\", \"dim1\")",
|
||||
"array_set_add_all(\"__acc\", \"a0\")",
|
||||
null,
|
||||
null,
|
||||
new HumanReadableBytes(1024),
|
||||
ExprMacroTable.nil()
|
||||
))
|
||||
.intervals(querySegmentSpec(Filtration.eternity()))
|
||||
.granularity(Granularities.ALL)
|
||||
.context(QUERY_CONTEXT_NO_STRINGIFY_ARRAY)
|
||||
.build()
|
||||
)
|
||||
.setInterval(querySegmentSpec(Filtration.eternity()))
|
||||
.setGranularity(Granularities.ALL)
|
||||
.setDimensions(
|
||||
new DefaultDimensionSpec("d0", "_d0", ColumnType.STRING),
|
||||
new DefaultDimensionSpec("a0", "_d1", ColumnType.STRING_ARRAY)
|
||||
)
|
||||
.setAggregatorSpecs(new CountAggregatorFactory("_a0"))
|
||||
.setContext(QUERY_CONTEXT_DEFAULT)
|
||||
.build()
|
||||
),
|
||||
useDefault ?
|
||||
ImmutableList.of(
|
||||
|
|
|
@ -5381,6 +5381,8 @@ public class CalciteJoinQueryTest extends BaseCalciteQueryTest
|
|||
@Test
|
||||
public void testPlanWithInFilterMoreThanInSubQueryThreshold()
|
||||
{
|
||||
skipVectorize();
|
||||
cannotVectorize();
|
||||
String query = "SELECT l1 FROM numfoo WHERE l1 IN (4842, 4844, 4845, 14905, 4853, 29064)";
|
||||
|
||||
Map<String, Object> queryContext = new HashMap<>(QUERY_CONTEXT_DEFAULT);
|
||||
|
@ -5397,21 +5399,32 @@ public class CalciteJoinQueryTest extends BaseCalciteQueryTest
|
|||
.dataSource(
|
||||
JoinDataSource.create(
|
||||
new TableDataSource(CalciteTests.DATASOURCE3),
|
||||
InlineDataSource.fromIterable(
|
||||
ImmutableList.of(
|
||||
new Object[]{4842L},
|
||||
new Object[]{4844L},
|
||||
new Object[]{4845L},
|
||||
new Object[]{14905L},
|
||||
new Object[]{4853L},
|
||||
new Object[]{29064L}
|
||||
),
|
||||
RowSignature.builder()
|
||||
.add("ROW_VALUE", ColumnType.LONG)
|
||||
new QueryDataSource(
|
||||
GroupByQuery.builder()
|
||||
.setDataSource(InlineDataSource.fromIterable(
|
||||
ImmutableList.of(
|
||||
new Object[]{4842L},
|
||||
new Object[]{4844L},
|
||||
new Object[]{4845L},
|
||||
new Object[]{14905L},
|
||||
new Object[]{4853L},
|
||||
new Object[]{29064L}
|
||||
),
|
||||
RowSignature.builder()
|
||||
.add("ROW_VALUE", ColumnType.LONG)
|
||||
.build()
|
||||
)
|
||||
)
|
||||
.setInterval(querySegmentSpec(Filtration.eternity()))
|
||||
.setDimensions(
|
||||
new DefaultDimensionSpec("ROW_VALUE", "d0", ColumnType.LONG)
|
||||
)
|
||||
.setGranularity(Granularities.ALL)
|
||||
.setLimitSpec(NoopLimitSpec.instance())
|
||||
.build()
|
||||
),
|
||||
"j0.",
|
||||
"(\"l1\" == \"j0.ROW_VALUE\")",
|
||||
"(\"l1\" == \"j0.d0\")",
|
||||
JoinType.INNER,
|
||||
null,
|
||||
ExprMacroTable.nil(),
|
||||
|
|
|
@ -13949,15 +13949,32 @@ public class CalciteQueryTest extends BaseCalciteQueryTest
|
|||
+ "group by 1",
|
||||
ImmutableList.of(
|
||||
GroupByQuery.builder()
|
||||
.setDataSource(CalciteTests.DATASOURCE3)
|
||||
.setDataSource(GroupByQuery.builder()
|
||||
.setDataSource(CalciteTests.DATASOURCE3)
|
||||
.setInterval(querySegmentSpec(Intervals.ETERNITY))
|
||||
.setGranularity(Granularities.ALL)
|
||||
.addDimension(new DefaultDimensionSpec(
|
||||
"dim1",
|
||||
"_d0",
|
||||
ColumnType.STRING
|
||||
))
|
||||
.addAggregator(new LongSumAggregatorFactory("a0", "l1"))
|
||||
.build()
|
||||
)
|
||||
.setInterval(querySegmentSpec(Intervals.ETERNITY))
|
||||
.setGranularity(Granularities.ALL)
|
||||
.addDimension(new DefaultDimensionSpec("dim1", "_d0", ColumnType.STRING))
|
||||
.addAggregator(new LongSumAggregatorFactory("a0", "l1"))
|
||||
.setPostAggregatorSpecs(ImmutableList.of(
|
||||
expressionPostAgg("p0", "case_searched((\"a0\" == 0),1,0)")
|
||||
.setDimensions(new DefaultDimensionSpec("_d0", "d0", ColumnType.STRING))
|
||||
.setAggregatorSpecs(aggregators(
|
||||
new FilteredAggregatorFactory(
|
||||
new CountAggregatorFactory("_a0"),
|
||||
useDefault ?
|
||||
selector("a0", "0") :
|
||||
equality("a0", 0, ColumnType.LONG)
|
||||
)
|
||||
))
|
||||
.setGranularity(Granularities.ALL)
|
||||
.setContext(QUERY_CONTEXT_DEFAULT)
|
||||
.build()
|
||||
|
||||
),
|
||||
useDefault ? ImmutableList.of(
|
||||
new Object[]{"", 0L},
|
||||
|
@ -14275,4 +14292,138 @@ public class CalciteQueryTest extends BaseCalciteQueryTest
|
|||
.run());
|
||||
assertThat(e, invalidSqlIs("ASCENDING ordering with NULLS LAST is not supported! (line [1], column [41])"));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testInGroupByLimitOutGroupByOrderBy()
|
||||
{
|
||||
skipVectorize();
|
||||
cannotVectorize();
|
||||
testQuery(
|
||||
"with t AS (SELECT m2, COUNT(m1) as trend_score\n"
|
||||
+ "FROM \"foo\"\n"
|
||||
+ "GROUP BY 1 \n"
|
||||
+ "LIMIT 10\n"
|
||||
+ ")\n"
|
||||
+ "select m2, (MAX(trend_score)) from t\n"
|
||||
+ "where m2 > 2\n"
|
||||
+ "GROUP BY 1 \n"
|
||||
+ "ORDER BY 2 DESC",
|
||||
QUERY_CONTEXT_DEFAULT,
|
||||
ImmutableList.of(
|
||||
new GroupByQuery.Builder()
|
||||
.setDataSource(
|
||||
new TopNQueryBuilder()
|
||||
.dataSource(CalciteTests.DATASOURCE1)
|
||||
.intervals(querySegmentSpec(Filtration.eternity()))
|
||||
.dimension(new DefaultDimensionSpec("m2", "d0", ColumnType.DOUBLE))
|
||||
.threshold(10)
|
||||
.aggregators(aggregators(
|
||||
useDefault
|
||||
? new CountAggregatorFactory("a0")
|
||||
: new FilteredAggregatorFactory(
|
||||
new CountAggregatorFactory("a0"),
|
||||
notNull("m1")
|
||||
)
|
||||
))
|
||||
.metric(new DimensionTopNMetricSpec(null, StringComparators.NUMERIC))
|
||||
.context(OUTER_LIMIT_CONTEXT)
|
||||
.build()
|
||||
)
|
||||
.setInterval(querySegmentSpec(Filtration.eternity()))
|
||||
.setGranularity(Granularities.ALL)
|
||||
.setDimensions(
|
||||
new DefaultDimensionSpec("d0", "_d0", ColumnType.DOUBLE)
|
||||
)
|
||||
.setDimFilter(
|
||||
useDefault ?
|
||||
bound("d0", "2", null, true, false, null, StringComparators.NUMERIC) :
|
||||
new RangeFilter("d0", ColumnType.LONG, 2L, null, true, false, null)
|
||||
)
|
||||
.setAggregatorSpecs(aggregators(
|
||||
new LongMaxAggregatorFactory("_a0", "a0")
|
||||
))
|
||||
.setLimitSpec(
|
||||
DefaultLimitSpec
|
||||
.builder()
|
||||
.orderBy(new OrderByColumnSpec("_a0", Direction.DESCENDING, StringComparators.NUMERIC))
|
||||
.build()
|
||||
)
|
||||
.setContext(OUTER_LIMIT_CONTEXT)
|
||||
.build()
|
||||
),
|
||||
ImmutableList.of(
|
||||
new Object[]{3.0D, 1L},
|
||||
new Object[]{4.0D, 1L},
|
||||
new Object[]{5.0D, 1L},
|
||||
new Object[]{6.0D, 1L}
|
||||
)
|
||||
);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testInGroupByOrderByLimitOutGroupByOrderByLimit()
|
||||
{
|
||||
skipVectorize();
|
||||
cannotVectorize();
|
||||
testQuery(
|
||||
"with t AS (SELECT m2 as mo, COUNT(m1) as trend_score\n"
|
||||
+ "FROM \"foo\"\n"
|
||||
+ "GROUP BY 1\n"
|
||||
+ "ORDER BY trend_score DESC\n"
|
||||
+ "LIMIT 10)\n"
|
||||
+ "select mo, (MAX(trend_score)) from t\n"
|
||||
+ "where mo > 2\n"
|
||||
+ "GROUP BY 1 \n"
|
||||
+ "ORDER BY 2 DESC LIMIT 2\n",
|
||||
QUERY_CONTEXT_DEFAULT,
|
||||
ImmutableList.of(
|
||||
new GroupByQuery.Builder()
|
||||
.setDataSource(
|
||||
new TopNQueryBuilder()
|
||||
.dataSource(CalciteTests.DATASOURCE1)
|
||||
.intervals(querySegmentSpec(Filtration.eternity()))
|
||||
.dimension(new DefaultDimensionSpec("m2", "d0", ColumnType.DOUBLE))
|
||||
.threshold(10)
|
||||
.aggregators(aggregators(
|
||||
useDefault
|
||||
? new CountAggregatorFactory("a0")
|
||||
: new FilteredAggregatorFactory(
|
||||
new CountAggregatorFactory("a0"),
|
||||
notNull("m1")
|
||||
)
|
||||
))
|
||||
.metric(new NumericTopNMetricSpec("a0"))
|
||||
.context(OUTER_LIMIT_CONTEXT)
|
||||
.build()
|
||||
)
|
||||
.setInterval(querySegmentSpec(Filtration.eternity()))
|
||||
.setGranularity(Granularities.ALL)
|
||||
.setDimensions(
|
||||
new DefaultDimensionSpec("d0", "_d0", ColumnType.DOUBLE)
|
||||
)
|
||||
.setDimFilter(
|
||||
useDefault ?
|
||||
bound("d0", "2", null, true, false, null, StringComparators.NUMERIC) :
|
||||
new RangeFilter("d0", ColumnType.LONG, 2L, null, true, false, null)
|
||||
)
|
||||
.setAggregatorSpecs(aggregators(
|
||||
new LongMaxAggregatorFactory("_a0", "a0")
|
||||
))
|
||||
.setLimitSpec(
|
||||
DefaultLimitSpec
|
||||
.builder()
|
||||
.orderBy(new OrderByColumnSpec("_a0", Direction.DESCENDING, StringComparators.NUMERIC))
|
||||
.limit(2)
|
||||
.build()
|
||||
)
|
||||
.setContext(OUTER_LIMIT_CONTEXT)
|
||||
.build()
|
||||
),
|
||||
ImmutableList.of(
|
||||
new Object[]{3.0D, 1L},
|
||||
new Object[]{4.0D, 1L}
|
||||
)
|
||||
);
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -4364,7 +4364,6 @@ public class DrillWindowQueryTest extends BaseCalciteQueryTest
|
|||
windowQueryTest();
|
||||
}
|
||||
|
||||
@NotYetSupported(Modes.CANNOT_APPLY_VIRTUAL_COL)
|
||||
@DrillTest("nestedAggs/multiWin_5")
|
||||
@Test
|
||||
public void test_nestedAggs_multiWin_5()
|
||||
|
|
|
@ -8,15 +8,15 @@ false 280487665.00748299428571428571
|
|||
false 295757331.64717262000000000000
|
||||
false 302662813.16785714333333333333
|
||||
false 304608131.82107142900000000000
|
||||
false 303537640.51502361272727272727
|
||||
false 303537640.515023651272727272727
|
||||
true 4.0000000000000000
|
||||
true 4.5000000000000000
|
||||
true 5.2222222222222222
|
||||
true 4101.1041666666666667
|
||||
true 5907.4433333333333333
|
||||
true 284524.6472222222222778
|
||||
true 449300.4527210884353810
|
||||
true 449300.4527210885000000
|
||||
true 550414.3805059523809583
|
||||
true 613525.0542768959435185
|
||||
true 613525.0542768961000000
|
||||
true 652829.5088492063491667
|
||||
true 676669.0989538239537879
|
||||
true 676669.0989538240000000
|
||||
|
|
Loading…
Reference in New Issue