Handling planning with alias for time for group by and order by (#12418)

An outer scan query, that requires ordering on a column, should be considered an invalid query.
This commit is contained in:
somu-imply 2022-04-14 21:59:17 -07:00 committed by GitHub
parent a72cc28959
commit cd6fba2f6c
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 148 additions and 12 deletions

View File

@ -32,8 +32,8 @@ import org.apache.calcite.rel.RelWriter;
import org.apache.calcite.rel.metadata.RelMetadataQuery;
import org.apache.calcite.rel.type.RelDataType;
import org.apache.druid.java.util.common.StringUtils;
import org.apache.druid.query.Druids;
import org.apache.druid.query.QueryDataSource;
import org.apache.druid.query.TableDataSource;
import org.apache.druid.segment.column.RowSignature;
import org.apache.druid.sql.calcite.planner.PlannerContext;
import org.apache.druid.sql.calcite.table.RowSignatures;
@ -46,7 +46,9 @@ import java.util.Set;
*/
public class DruidOuterQueryRel extends DruidRel<DruidOuterQueryRel>
{
private static final TableDataSource DUMMY_DATA_SOURCE = new TableDataSource("__subquery__");
private static final QueryDataSource DUMMY_DATA_SOURCE = new QueryDataSource(
Druids.newScanQueryBuilder().dataSource("__subquery__").eternityInterval().build()
);
private final PartialDruidQuery partialQuery;
private RelNode sourceRel;

View File

@ -1199,14 +1199,25 @@ public class DruidQuery
orderByColumns = Collections.emptyList();
}
if (!queryFeatureInspector.feature(QueryFeature.SCAN_CAN_ORDER_BY_NON_TIME)
&& (orderByColumns.size() > 1
|| orderByColumns.stream()
.anyMatch(orderBy -> !orderBy.getColumnName().equals(ColumnHolder.TIME_COLUMN_NAME)))) {
// Cannot handle this ordering.
// Scan cannot ORDER BY non-time columns.
plannerContext.setPlanningError("SQL query requires order by non-time column %s that is not supported.", orderByColumns);
return null;
if (!queryFeatureInspector.feature(QueryFeature.SCAN_CAN_ORDER_BY_NON_TIME) && !orderByColumns.isEmpty()) {
if (orderByColumns.size() > 1 || !ColumnHolder.TIME_COLUMN_NAME.equals(orderByColumns.get(0).getColumnName())) {
// Cannot handle this ordering.
// Scan cannot ORDER BY non-time columns.
plannerContext.setPlanningError(
"SQL query requires order by non-time column %s that is not supported.",
orderByColumns
);
return null;
}
if (!dataSource.isConcrete()) {
// Cannot handle this ordering.
// Scan cannot ORDER BY non-time columns.
plannerContext.setPlanningError(
"SQL query requires order by non-time column on a datasource[%s], which is not supported.",
dataSource
);
return null;
}
}
// Compute the list of columns to select, sorted and deduped.

View File

@ -104,6 +104,64 @@ import static org.apache.druid.query.QueryContexts.JOIN_FILTER_REWRITE_ENABLE_KE
@RunWith(JUnitParamsRunner.class)
public class CalciteJoinQueryTest extends BaseCalciteQueryTest
{
@Test
public void testInnerJoinWithLimitAndAlias() throws Exception
{
minTopNThreshold = 1;
Map<String, Object> context = new HashMap<>(QUERY_CONTEXT_DEFAULT);
context.put(PlannerConfig.CTX_KEY_USE_APPROXIMATE_TOPN, false);
testQuery(
"select t1.b1 from (select __time as b1 from numfoo group by 1 order by 1) as t1 inner join (\n"
+ " select __time as b2 from foo group by 1 order by 1\n"
+ ") as t2 on t1.b1 = t2.b2 ",
context, // turn on exact topN
ImmutableList.of(
newScanQueryBuilder()
.intervals(querySegmentSpec(Filtration.eternity()))
.dataSource(
JoinDataSource.create(
new QueryDataSource(
GroupByQuery.builder()
.setInterval(querySegmentSpec(Filtration.eternity()))
.setGranularity(Granularities.ALL)
.setDataSource(new TableDataSource("numfoo"))
.setDimensions(new DefaultDimensionSpec("__time", "_d0", ColumnType.LONG))
.setContext(context)
.build()
),
new QueryDataSource(
GroupByQuery.builder()
.setInterval(querySegmentSpec(Filtration.eternity()))
.setGranularity(Granularities.ALL)
.setDataSource(new TableDataSource("foo"))
.setDimensions(new DefaultDimensionSpec("__time", "d0", ColumnType.LONG))
.setContext(context)
.build()
),
"j0.",
"(\"_d0\" == \"j0.d0\")",
JoinType.INNER,
null,
ExprMacroTable.nil()
)
)
.columns("_d0")
.context(context)
.build()
),
ImmutableList.of(
new Object[]{946684800000L},
new Object[]{946771200000L},
new Object[]{946857600000L},
new Object[]{978307200000L},
new Object[]{978393600000L},
new Object[]{978480000000L}
)
);
}
@Test
public void testExactTopNOnInnerJoinWithLimit() throws Exception
{

View File

@ -6719,6 +6719,12 @@ public class CalciteQueryTest extends BaseCalciteQueryTest
)
)
.setInterval(querySegmentSpec(Filtration.eternity()))
.setLimitSpec(
new DefaultLimitSpec(
ImmutableList.of(),
1
)
)
.setGranularity(Granularities.ALL)
.setAggregatorSpecs(
useDefault
@ -6752,7 +6758,7 @@ public class CalciteQueryTest extends BaseCalciteQueryTest
new FieldAccessPostAggregator(null, "_a2:count")
)
),
expressionPostAgg("p0", "timestamp_extract(\"_a3\",'EPOCH','UTC')")
expressionPostAgg("s0", "timestamp_extract(\"_a3\",'EPOCH','UTC')")
)
)
.setContext(QUERY_CONTEXT_DEFAULT)
@ -7002,7 +7008,7 @@ public class CalciteQueryTest extends BaseCalciteQueryTest
+ " )\n"
+ ")";
final String legacyExplanation =
"DruidOuterQueryRel(query=[{\"queryType\":\"timeseries\",\"dataSource\":{\"type\":\"table\",\"name\":\"__subquery__\"},\"intervals\":{\"type\":\"intervals\",\"intervals\":[\"-146136543-09-08T08:23:32.096Z/146140482-04-24T15:36:27.903Z\"]},\"descending\":false,\"virtualColumns\":[],\"filter\":null,\"granularity\":{\"type\":\"all\"},\"aggregations\":[{\"type\":\"count\",\"name\":\"a0\"}],\"postAggregations\":[],\"limit\":2147483647,\"context\":{\"defaultTimeout\":300000,\"maxScatterGatherBytes\":9223372036854775807,\"sqlCurrentTimestamp\":\"2000-01-01T00:00:00Z\",\"sqlQueryId\":\"dummy\",\"vectorize\":\"false\",\"vectorizeVirtualColumns\":\"false\"}}], signature=[{a0:LONG}])\n"
"DruidOuterQueryRel(query=[{\"queryType\":\"groupBy\",\"dataSource\":{\"type\":\"query\",\"query\":{\"queryType\":\"scan\",\"dataSource\":{\"type\":\"table\",\"name\":\"__subquery__\"},\"intervals\":{\"type\":\"intervals\",\"intervals\":[\"-146136543-09-08T08:23:32.096Z/146140482-04-24T15:36:27.903Z\"]},\"virtualColumns\":[],\"resultFormat\":\"list\",\"batchSize\":20480,\"filter\":null,\"context\":null,\"descending\":false,\"granularity\":{\"type\":\"all\"}}},\"intervals\":{\"type\":\"intervals\",\"intervals\":[\"-146136543-09-08T08:23:32.096Z/146140482-04-24T15:36:27.903Z\"]},\"virtualColumns\":[],\"filter\":null,\"granularity\":{\"type\":\"all\"},\"dimensions\":[],\"aggregations\":[{\"type\":\"count\",\"name\":\"a0\"}],\"postAggregations\":[],\"having\":null,\"limitSpec\":{\"type\":\"NoopLimitSpec\"},\"context\":{\"defaultTimeout\":300000,\"maxScatterGatherBytes\":9223372036854775807,\"sqlCurrentTimestamp\":\"2000-01-01T00:00:00Z\",\"sqlQueryId\":\"dummy\",\"vectorize\":\"false\",\"vectorizeVirtualColumns\":\"false\"},\"descending\":false}], signature=[{a0:LONG}])\n"
+ " DruidJoinQueryRel(condition=[=(SUBSTRING($3, 1, 1), $8)], joinType=[inner], query=[{\"queryType\":\"groupBy\",\"dataSource\":{\"type\":\"table\",\"name\":\"__join__\"},\"intervals\":{\"type\":\"intervals\",\"intervals\":[\"-146136543-09-08T08:23:32.096Z/146140482-04-24T15:36:27.903Z\"]},\"virtualColumns\":[],\"filter\":null,\"granularity\":{\"type\":\"all\"},\"dimensions\":[{\"type\":\"default\",\"dimension\":\"dim2\",\"outputName\":\"d0\",\"outputType\":\"STRING\"}],\"aggregations\":[],\"postAggregations\":[],\"having\":null,\"limitSpec\":{\"type\":\"NoopLimitSpec\"},\"context\":{\"defaultTimeout\":300000,\"maxScatterGatherBytes\":9223372036854775807,\"sqlCurrentTimestamp\":\"2000-01-01T00:00:00Z\",\"sqlQueryId\":\"dummy\",\"vectorize\":\"false\",\"vectorizeVirtualColumns\":\"false\"},\"descending\":false}], signature=[{d0:STRING}])\n"
+ " DruidQueryRel(query=[{\"queryType\":\"scan\",\"dataSource\":{\"type\":\"table\",\"name\":\"foo\"},\"intervals\":{\"type\":\"intervals\",\"intervals\":[\"-146136543-09-08T08:23:32.096Z/146140482-04-24T15:36:27.903Z\"]},\"virtualColumns\":[],\"resultFormat\":\"compactedList\",\"batchSize\":20480,\"filter\":null,\"columns\":[\"__time\",\"cnt\",\"dim1\",\"dim2\",\"dim3\",\"m1\",\"m2\",\"unique_dim1\"],\"legacy\":false,\"context\":{\"defaultTimeout\":300000,\"maxScatterGatherBytes\":9223372036854775807,\"sqlCurrentTimestamp\":\"2000-01-01T00:00:00Z\",\"sqlQueryId\":\"dummy\",\"vectorize\":\"false\",\"vectorizeVirtualColumns\":\"false\"},\"descending\":false,\"granularity\":{\"type\":\"all\"}}], signature=[{__time:LONG, cnt:LONG, dim1:STRING, dim2:STRING, dim3:STRING, m1:FLOAT, m2:DOUBLE, unique_dim1:COMPLEX<hyperUnique>}])\n"
+ " DruidQueryRel(query=[{\"queryType\":\"groupBy\",\"dataSource\":{\"type\":\"table\",\"name\":\"foo\"},\"intervals\":{\"type\":\"intervals\",\"intervals\":[\"-146136543-09-08T08:23:32.096Z/146140482-04-24T15:36:27.903Z\"]},\"virtualColumns\":[],\"filter\":{\"type\":\"not\",\"field\":{\"type\":\"selector\",\"dimension\":\"dim1\",\"value\":null,\"extractionFn\":null}},\"granularity\":{\"type\":\"all\"},\"dimensions\":[{\"type\":\"extraction\",\"dimension\":\"dim1\",\"outputName\":\"d0\",\"outputType\":\"STRING\",\"extractionFn\":{\"type\":\"substring\",\"index\":0,\"length\":1}}],\"aggregations\":[],\"postAggregations\":[],\"having\":null,\"limitSpec\":{\"type\":\"NoopLimitSpec\"},\"context\":{\"defaultTimeout\":300000,\"maxScatterGatherBytes\":9223372036854775807,\"sqlCurrentTimestamp\":\"2000-01-01T00:00:00Z\",\"sqlQueryId\":\"dummy\",\"vectorize\":\"false\",\"vectorizeVirtualColumns\":\"false\"},\"descending\":false}], signature=[{d0:STRING}])\n";
@ -11100,6 +11106,65 @@ public class CalciteQueryTest extends BaseCalciteQueryTest
);
}
@Test
public void testOrderByAlongWithAliasOrderByTimeGroupByMulti() throws Exception
{
testQuery(
"select __time as bug, dim2 from druid.foo group by 1, 2 order by 1 limit 1",
ImmutableList.of(
GroupByQuery.builder()
.setDataSource(CalciteTests.DATASOURCE1)
.setInterval(querySegmentSpec(Filtration.eternity()))
.setGranularity(Granularities.ALL)
.setDimensions(
dimensions(
new DefaultDimensionSpec("__time", "d0", ColumnType.LONG),
new DefaultDimensionSpec("dim2", "d1", ColumnType.STRING)
)
)
.setLimitSpec(
new DefaultLimitSpec(
Collections.singletonList(
new OrderByColumnSpec("d0", Direction.ASCENDING, StringComparators.NUMERIC)
),
1
)
)
.setContext(QUERY_CONTEXT_DEFAULT)
.build()
),
ImmutableList.of(
new Object[]{946684800000L, "a"}
)
);
}
@Test
public void testOrderByAlongWithAliasOrderByTimeGroupByOneCol() throws Exception
{
testQuery(
"select __time as bug from druid.foo group by 1 order by 1 limit 1",
ImmutableList.of(
new TopNQueryBuilder()
.dataSource(CalciteTests.DATASOURCE1)
.intervals(querySegmentSpec(Filtration.eternity()))
.granularity(Granularities.ALL)
.dimension(
new DefaultDimensionSpec("__time", "d0", ColumnType.LONG)
)
.threshold(1)
.metric(new DimensionTopNMetricSpec(null, StringComparators.NUMERIC))
.context(QUERY_CONTEXT_DEFAULT)
.build()
),
ImmutableList.of(
new Object[]{946684800000L}
)
);
}
@Test
public void testProjectAfterSort() throws Exception
{