mirror of https://github.com/apache/druid.git
Set explain attributes after the query is prepared (#14490)
* Add support for DML WITH AS. * One more UT for with as subquery. * Add a test with join query * Use root query prepared node instead of individual SqlNode types. - Set the explain plan attributes after the query is prepared when the query is planned and we've the finalized output names in the root source rel node. - Adjust tests; add unit test for negative ordinal case. - Remove the exception / error handling logic from resolveClusteredBy function since the validations now happen before it comes to the function * Update comment.
This commit is contained in:
parent
5fc122a144
commit
d02bb8bb6e
|
@ -32,13 +32,11 @@ import org.apache.calcite.sql.SqlLiteral;
|
|||
import org.apache.calcite.sql.SqlNode;
|
||||
import org.apache.calcite.sql.SqlNodeList;
|
||||
import org.apache.calcite.sql.SqlNumericLiteral;
|
||||
import org.apache.calcite.sql.SqlOperator;
|
||||
import org.apache.calcite.sql.SqlOrderBy;
|
||||
import org.apache.calcite.sql.SqlSelect;
|
||||
import org.apache.calcite.sql.SqlTimestampLiteral;
|
||||
import org.apache.calcite.sql.SqlUtil;
|
||||
import org.apache.calcite.sql.dialect.CalciteSqlDialect;
|
||||
import org.apache.calcite.tools.ValidationException;
|
||||
import org.apache.calcite.util.Pair;
|
||||
import org.apache.druid.error.DruidException;
|
||||
import org.apache.druid.error.InvalidSqlInput;
|
||||
import org.apache.druid.java.util.common.StringUtils;
|
||||
|
@ -327,67 +325,41 @@ public class DruidSqlParserUtils
|
|||
* </pre>
|
||||
*
|
||||
* <p>
|
||||
* The function will return the following clusteredBy columns for the above SQL: ["__time", "page_alias", "FLOOR(\"cost\")", cityName"]
|
||||
* The function will return the following clusteredBy columns for the above SQL: ["__time", "page_alias", "FLOOR(\"cost\")", cityName"].
|
||||
* Any ordinal and expression specified in the CLUSTERED BY clause will resolve to the final output column name.
|
||||
* </p>
|
||||
* <p>
|
||||
* This function must be called after the query is prepared when all the validations are complete, including {@link #validateClusteredByColumns},
|
||||
* so we can safely access the arguments.
|
||||
* </p>
|
||||
* @param clusteredByNodes List of {@link SqlNode}s representing columns to be clustered by.
|
||||
* @param sourceNode The select or order by source node.
|
||||
* @param sourceFieldMappings The source field output mappings extracted from the validated root query rel node post prepare phase.
|
||||
*
|
||||
*/
|
||||
@Nullable
|
||||
public static List<String> resolveClusteredByColumnsToOutputColumns(SqlNodeList clusteredByNodes, SqlNode sourceNode)
|
||||
public static List<String> resolveClusteredByColumnsToOutputColumns(
|
||||
final SqlNodeList clusteredByNodes,
|
||||
final ImmutableList<Pair<Integer, String>> sourceFieldMappings
|
||||
)
|
||||
{
|
||||
// CLUSTERED BY is an optional clause
|
||||
if (clusteredByNodes == null) {
|
||||
return null;
|
||||
}
|
||||
|
||||
Preconditions.checkArgument(
|
||||
sourceNode instanceof SqlSelect || sourceNode instanceof SqlOrderBy,
|
||||
"Source node must be either SqlSelect or SqlOrderBy, but found [%s]",
|
||||
sourceNode == null ? null : sourceNode.getKind()
|
||||
);
|
||||
|
||||
final SqlSelect selectNode = (sourceNode instanceof SqlSelect) ? (SqlSelect) sourceNode
|
||||
: (SqlSelect) ((SqlOrderBy) sourceNode).query;
|
||||
final List<SqlNode> selectList = selectNode.getSelectList().getList();
|
||||
final List<String> retClusteredByNames = new ArrayList<>();
|
||||
|
||||
for (SqlNode clusteredByNode : clusteredByNodes) {
|
||||
|
||||
if (SqlUtil.isLiteral(clusteredByNode)) {
|
||||
// The node is a literal number -- an ordinal is specified in the CLUSTERED BY clause. Validate and lookup the
|
||||
// ordinal in the select list.
|
||||
int ordinal = ((SqlNumericLiteral) clusteredByNode).getValueAs(Integer.class);
|
||||
if (ordinal < 1 || ordinal > selectList.size()) {
|
||||
throw InvalidSqlInput.exception(
|
||||
"Ordinal[%d] specified in the CLUSTERED BY clause is invalid. It must be between 1 and %d.",
|
||||
ordinal,
|
||||
selectList.size()
|
||||
);
|
||||
}
|
||||
SqlNode node = selectList.get(ordinal - 1);
|
||||
|
||||
if (node instanceof SqlBasicCall) {
|
||||
retClusteredByNames.add(getColumnNameFromSqlCall(node));
|
||||
} else {
|
||||
Preconditions.checkArgument(
|
||||
node instanceof SqlIdentifier,
|
||||
"Node must be a SqlIdentifier, but found [%s]",
|
||||
node.getKind()
|
||||
);
|
||||
SqlIdentifier n = ((SqlIdentifier) node);
|
||||
retClusteredByNames.add(n.isSimple() ? n.getSimple() : n.names.get(1));
|
||||
}
|
||||
if (clusteredByNode instanceof SqlNumericLiteral) {
|
||||
// The node is a literal number -- an ordinal in the CLUSTERED BY clause. Lookup the ordinal in field mappings.
|
||||
final int ordinal = ((SqlNumericLiteral) clusteredByNode).getValueAs(Integer.class);
|
||||
retClusteredByNames.add(sourceFieldMappings.get(ordinal - 1).right);
|
||||
} else if (clusteredByNode instanceof SqlBasicCall) {
|
||||
// The node is an expression/operator.
|
||||
retClusteredByNames.add(getColumnNameFromSqlCall(clusteredByNode));
|
||||
retClusteredByNames.add(getColumnNameFromSqlCall((SqlBasicCall) clusteredByNode));
|
||||
} else {
|
||||
// The node is a simple SqlIdentifier, add the name.
|
||||
Preconditions.checkArgument(
|
||||
clusteredByNode instanceof SqlIdentifier,
|
||||
"ClusteredBy node must be a SqlIdentifier, but found [%s]",
|
||||
clusteredByNode.getKind()
|
||||
);
|
||||
// For everything else, just return the simple string representation of the node.
|
||||
retClusteredByNames.add(clusteredByNode.toString());
|
||||
}
|
||||
}
|
||||
|
@ -395,16 +367,12 @@ public class DruidSqlParserUtils
|
|||
return retClusteredByNames;
|
||||
}
|
||||
|
||||
private static String getColumnNameFromSqlCall(final SqlNode sqlCallNode)
|
||||
private static String getColumnNameFromSqlCall(final SqlBasicCall sqlCallNode)
|
||||
{
|
||||
Preconditions.checkArgument(sqlCallNode instanceof SqlBasicCall, "Node must be a SqlBasicCall type");
|
||||
|
||||
// The node may be an alias or expression, in which case we'll get the output name
|
||||
SqlBasicCall sqlBasicCall = (SqlBasicCall) sqlCallNode;
|
||||
SqlOperator operator = (sqlBasicCall).getOperator();
|
||||
if (operator instanceof SqlAsOperator) {
|
||||
if (sqlCallNode.getOperator() instanceof SqlAsOperator) {
|
||||
// Get the output name for the alias operator.
|
||||
SqlNode sqlNode = (sqlBasicCall).getOperandList().get(1);
|
||||
SqlNode sqlNode = sqlCallNode.getOperandList().get(1);
|
||||
return sqlNode.toString();
|
||||
} else {
|
||||
// Return the expression as-is.
|
||||
|
@ -430,6 +398,18 @@ public class DruidSqlParserUtils
|
|||
clusteredByNode
|
||||
);
|
||||
}
|
||||
|
||||
// Calcite already throws Ordinal out of range exception for positive non-existent ordinals. This negative ordinal check
|
||||
// is for completeness and is fixed in later Calcite versions.
|
||||
if (clusteredByNode instanceof SqlNumericLiteral) {
|
||||
final int ordinal = ((SqlNumericLiteral) clusteredByNode).getValueAs(Integer.class);
|
||||
if (ordinal < 1) {
|
||||
throw InvalidSqlInput.exception(
|
||||
"Ordinal [%d] specified in the CLUSTERED BY clause is invalid. It must be a positive integer.",
|
||||
ordinal
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -151,9 +151,6 @@ public class DruidPlanner implements Closeable
|
|||
handler = createHandler(root);
|
||||
handler.validate();
|
||||
plannerContext.setResourceActions(handler.resourceActions());
|
||||
if (root.getKind() == SqlKind.EXPLAIN) {
|
||||
plannerContext.setExplainAttributes(handler.explainAttributes());
|
||||
}
|
||||
state = State.VALIDATED;
|
||||
}
|
||||
|
||||
|
|
|
@ -277,7 +277,7 @@ public abstract class IngestHandler extends QueryHandler
|
|||
DruidSqlInsert.OPERATOR.getName(),
|
||||
targetDatasource,
|
||||
ingestionGranularity,
|
||||
DruidSqlParserUtils.resolveClusteredByColumnsToOutputColumns(sqlNode.getClusteredBy(), sqlNode.getSource()),
|
||||
DruidSqlParserUtils.resolveClusteredByColumnsToOutputColumns(sqlNode.getClusteredBy(), rootQueryRel.fields),
|
||||
null
|
||||
);
|
||||
}
|
||||
|
@ -351,7 +351,7 @@ public abstract class IngestHandler extends QueryHandler
|
|||
DruidSqlReplace.OPERATOR.getName(),
|
||||
targetDatasource,
|
||||
ingestionGranularity,
|
||||
DruidSqlParserUtils.resolveClusteredByColumnsToOutputColumns(sqlNode.getClusteredBy(), sqlNode.getSource()),
|
||||
DruidSqlParserUtils.resolveClusteredByColumnsToOutputColumns(sqlNode.getClusteredBy(), rootQueryRel.fields),
|
||||
replaceIntervals
|
||||
);
|
||||
}
|
||||
|
|
|
@ -162,6 +162,7 @@ public abstract class QueryHandler extends SqlStatementHandler.BaseStatementHand
|
|||
final RelDataType returnedRowType;
|
||||
|
||||
if (explain != null) {
|
||||
handlerContext.plannerContext().setExplainAttributes(explainAttributes());
|
||||
returnedRowType = getExplainStructType(typeFactory);
|
||||
} else {
|
||||
returnedRowType = returnedRowType();
|
||||
|
|
|
@ -755,6 +755,209 @@ public class CalciteInsertDmlTest extends CalciteIngestionDmlTest
|
|||
didTest = true;
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testExplainPlanInsertWithAsSubQueryClusteredBy()
|
||||
{
|
||||
skipVectorize();
|
||||
|
||||
final String resources = "[{\"name\":\"EXTERNAL\",\"type\":\"EXTERNAL\"},{\"name\":\"foo\",\"type\":\"DATASOURCE\"}]";
|
||||
final String attributes = "{\"statementType\":\"INSERT\",\"targetDataSource\":\"foo\",\"partitionedBy\":{\"type\":\"all\"},\"clusteredBy\":[\"namespace\",\"country\"]}";
|
||||
|
||||
final String sql = "EXPLAIN PLAN FOR\n"
|
||||
+ "INSERT INTO \"foo\"\n"
|
||||
+ "WITH dd AS (\n"
|
||||
+ "SELECT * FROM TABLE(\n"
|
||||
+ " EXTERN(\n"
|
||||
+ " '{\"type\":\"inline\",\"data\":\"{\\\" \\\": 1681794225551, \\\"namespace\\\": \\\"day1\\\", \\\"country\\\": \\\"one\\\"}\\n{\\\"__time\\\": 1681794225558, \\\"namespace\\\": \\\"day2\\\", \\\"country\\\": \\\"two\\\"}\"}',\n"
|
||||
+ " '{\"type\":\"json\"}',\n"
|
||||
+ " '[{\"name\":\"__time\",\"type\":\"long\"},{\"name\":\"namespace\",\"type\":\"string\"},{\"name\":\"country\",\"type\":\"string\"}]'\n"
|
||||
+ " )\n"
|
||||
+ "))\n"
|
||||
+ "\n"
|
||||
+ "SELECT\n"
|
||||
+ " __time,\n"
|
||||
+ " namespace,\n"
|
||||
+ " country\n"
|
||||
+ "FROM dd\n"
|
||||
+ "PARTITIONED BY ALL\n"
|
||||
+ "CLUSTERED BY 2, 3";
|
||||
|
||||
final String legacyExplanation = "DruidQueryRel("
|
||||
+ "query=[{\"queryType\":\"scan\",\"dataSource\":{\"type\":\"external\","
|
||||
+ "\"inputSource\":{\"type\":\"inline\",\"data\":\"{\\\" \\\": 1681794225551, \\\"namespace\\\": \\\"day1\\\", \\\"country\\\": \\\"one\\\"}\\n"
|
||||
+ "{\\\"__time\\\": 1681794225558, \\\"namespace\\\": \\\"day2\\\", \\\"country\\\": \\\"two\\\"}\"},"
|
||||
+ "\"inputFormat\":{\"type\":\"json\",\"keepNullColumns\":false,\"assumeNewlineDelimited\":false,\"useJsonNodeReader\":false},"
|
||||
+ "\"signature\":[{\"name\":\"__time\",\"type\":\"LONG\"},{\"name\":\"namespace\",\"type\":\"STRING\"},{\"name\":\"country\",\"type\":\"STRING\"}]},"
|
||||
+ "\"intervals\":{\"type\":\"intervals\",\"intervals\":[\"-146136543-09-08T08:23:32.096Z/146140482-04-24T15:36:27.903Z\"]},"
|
||||
+ "\"resultFormat\":\"compactedList\",\"orderBy\":[{\"columnName\":\"namespace\",\"order\":\"ascending\"},{\"columnName\":\"country\",\"order\":\"ascending\"}],"
|
||||
+ "\"columns\":[\"__time\",\"country\",\"namespace\"],\"legacy\":false,\"context\":{\"sqlInsertSegmentGranularity\":\"{\\\"type\\\":\\\"all\\\"}\","
|
||||
+ "\"sqlQueryId\":\"dummy\",\"vectorize\":\"false\",\"vectorizeVirtualColumns\":\"false\"},\"granularity\":{\"type\":\"all\"}}],"
|
||||
+ " signature=[{__time:LONG, namespace:STRING, country:STRING}])\n";
|
||||
|
||||
// Use testQuery for EXPLAIN (not testIngestionQuery).
|
||||
testQuery(
|
||||
PLANNER_CONFIG_LEGACY_QUERY_EXPLAIN,
|
||||
ImmutableMap.of("sqlQueryId", "dummy"),
|
||||
Collections.emptyList(),
|
||||
sql,
|
||||
CalciteTests.SUPER_USER_AUTH_RESULT,
|
||||
ImmutableList.of(),
|
||||
new DefaultResultsVerifier(
|
||||
ImmutableList.of(
|
||||
new Object[]{
|
||||
legacyExplanation,
|
||||
resources,
|
||||
attributes
|
||||
}
|
||||
),
|
||||
null
|
||||
),
|
||||
null
|
||||
);
|
||||
|
||||
// Test correctness of the query when only the CLUSTERED BY clause is present
|
||||
final String explanation = "[{\"query\":{\"queryType\":\"scan\"," + "\"dataSource\":{\"type\":\"external\",\"inputSource\":{\"type\":\"inline\","
|
||||
+ "\"data\":\"{\\\" \\\": 1681794225551, \\\"namespace\\\": \\\"day1\\\", \\\"country\\\": \\\"one\\\"}\\n"
|
||||
+ "{\\\"__time\\\": 1681794225558, \\\"namespace\\\": \\\"day2\\\", \\\"country\\\": \\\"two\\\"}\"},"
|
||||
+ "\"inputFormat\":{\"type\":\"json\",\"keepNullColumns\":false,\"assumeNewlineDelimited\":false,\"useJsonNodeReader\":false},"
|
||||
+ "\"signature\":[{\"name\":\"__time\",\"type\":\"LONG\"},{\"name\":\"namespace\",\"type\":\"STRING\"},{\"name\":\"country\",\"type\":\"STRING\"}]},"
|
||||
+ "\"intervals\":{\"type\":\"intervals\",\"intervals\":[\"-146136543-09-08T08:23:32.096Z/146140482-04-24T15:36:27.903Z\"]},"
|
||||
+ "\"resultFormat\":\"compactedList\",\"orderBy\":[{\"columnName\":\"namespace\",\"order\":\"ascending\"},{\"columnName\":\"country\",\"order\":\"ascending\"}],"
|
||||
+ "\"columns\":[\"__time\",\"country\",\"namespace\"],\"legacy\":false,\"context\":{\"sqlInsertSegmentGranularity\":\"{\\\"type\\\":\\\"all\\\"}\","
|
||||
+ "\"sqlQueryId\":\"dummy\",\"vectorize\":\"false\",\"vectorizeVirtualColumns\":\"false\"},\"granularity\":{\"type\":\"all\"}},"
|
||||
+ "\"signature\":[{\"name\":\"__time\",\"type\":\"LONG\"},{\"name\":\"namespace\",\"type\":\"STRING\"},{\"name\":\"country\",\"type\":\"STRING\"}],"
|
||||
+ "\"columnMappings\":[{\"queryColumn\":\"__time\",\"outputColumn\":\"__time\"},{\"queryColumn\":\"namespace\",\"outputColumn\":\"namespace\"},"
|
||||
+ "{\"queryColumn\":\"country\",\"outputColumn\":\"country\"}]}]";
|
||||
|
||||
testQuery(
|
||||
PLANNER_CONFIG_NATIVE_QUERY_EXPLAIN,
|
||||
ImmutableMap.of("sqlQueryId", "dummy"),
|
||||
Collections.emptyList(),
|
||||
sql,
|
||||
CalciteTests.SUPER_USER_AUTH_RESULT,
|
||||
ImmutableList.of(),
|
||||
new DefaultResultsVerifier(
|
||||
ImmutableList.of(
|
||||
new Object[]{
|
||||
explanation,
|
||||
resources,
|
||||
attributes
|
||||
}
|
||||
),
|
||||
null
|
||||
),
|
||||
null
|
||||
);
|
||||
|
||||
// Not using testIngestionQuery, so must set didTest manually to satisfy the check in tearDown.
|
||||
didTest = true;
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testExplainPlanInsertJoinQuery()
|
||||
{
|
||||
skipVectorize();
|
||||
|
||||
final String resources = "[{\"name\":\"EXTERNAL\",\"type\":\"EXTERNAL\"},{\"name\":\"my_table\",\"type\":\"DATASOURCE\"}]";
|
||||
final String attributes = "{\"statementType\":\"INSERT\",\"targetDataSource\":\"my_table\",\"partitionedBy\":\"HOUR\",\"clusteredBy\":[\"__time\",\"isRobotAlias\",\"countryCapital\",\"regionName\"]}";
|
||||
|
||||
final String sql = "EXPLAIN PLAN FOR\n"
|
||||
+ "INSERT INTO my_table\n"
|
||||
+ "WITH\n"
|
||||
+ "wikidata AS (SELECT * FROM TABLE(\n"
|
||||
+ " EXTERN(\n"
|
||||
+ " '{\"type\":\"http\",\"uris\":[\"https://boo.gz\"]}',\n"
|
||||
+ " '{\"type\":\"json\"}',\n"
|
||||
+ " '[{\"name\":\"isRobot\",\"type\":\"string\"},{\"name\":\"timestamp\",\"type\":\"string\"},{\"name\":\"cityName\",\"type\":\"string\"},{\"name\":\"countryIsoCode\",\"type\":\"string\"},{\"name\":\"regionName\",\"type\":\"string\"}]'\n"
|
||||
+ " )\n"
|
||||
+ ")),\n"
|
||||
+ "countries AS (SELECT * FROM TABLE(\n"
|
||||
+ " EXTERN(\n"
|
||||
+ " '{\"type\":\"http\",\"uris\":[\"https://foo.tsv\"]}',\n"
|
||||
+ " '{\"type\":\"tsv\",\"findColumnsFromHeader\":true}',\n"
|
||||
+ " '[{\"name\":\"Country\",\"type\":\"string\"},{\"name\":\"Capital\",\"type\":\"string\"},"
|
||||
+ "{\"name\":\"ISO3\",\"type\":\"string\"},{\"name\":\"ISO2\",\"type\":\"string\"}]'\n"
|
||||
+ " )\n"
|
||||
+ "))\n"
|
||||
+ "SELECT\n"
|
||||
+ " TIME_PARSE(\"timestamp\") AS __time,\n"
|
||||
+ " isRobot AS isRobotAlias,\n"
|
||||
+ " countries.Capital AS countryCapital,\n"
|
||||
+ " regionName\n"
|
||||
+ "FROM wikidata\n"
|
||||
+ "LEFT JOIN countries ON wikidata.countryIsoCode = countries.ISO2\n"
|
||||
+ "PARTITIONED BY HOUR\n"
|
||||
+ "CLUSTERED BY 1, 2, 3, regionName";
|
||||
|
||||
final String legacyExplanation = "DruidJoinQueryRel(condition=[=($3, $6)], joinType=[left], query=[{\"queryType\":\"scan\",\"dataSource\":{\"type\":\"table\",\"name\":\"__join__\"},"
|
||||
+ "\"intervals\":{\"type\":\"intervals\",\"intervals\":[\"-146136543-09-08T08:23:32.096Z/146140482-04-24T15:36:27.903Z\"]},\"virtualColumns\":[{\"type\":\"expression\",\"name\":\"v0\","
|
||||
+ "\"expression\":\"timestamp_parse(\\\"timestamp\\\",null,'UTC')\",\"outputType\":\"LONG\"}],\"resultFormat\":\"compactedList\",\"orderBy\":[{\"columnName\":\"v0\",\"order\":\"ascending\"},{\"columnName\":\"isRobot\",\"order\":\"ascending\"},"
|
||||
+ "{\"columnName\":\"Capital\",\"order\":\"ascending\"},{\"columnName\":\"regionName\",\"order\":\"ascending\"}],\"columns\":[\"Capital\",\"isRobot\",\"regionName\",\"v0\"],\"legacy\":false,\"context\":{\"sqlInsertSegmentGranularity\":\"\\\"HOUR\\\"\",\"sqlQueryId\":\"dummy\","
|
||||
+ "\"vectorize\":\"false\",\"vectorizeVirtualColumns\":\"false\"},\"granularity\":{\"type\":\"all\"}}], signature=[{v0:LONG, isRobot:STRING, Capital:STRING, regionName:STRING}])\n"
|
||||
+ " DruidQueryRel(query=[{\"queryType\":\"scan\",\"dataSource\":{\"type\":\"external\",\"inputSource\":{\"type\":\"http\",\"uris\":[\"https://boo.gz\"]},\"inputFormat\":{\"type\":\"json\",\"keepNullColumns\":false,\"assumeNewlineDelimited\":false,"
|
||||
+ "\"useJsonNodeReader\":false},\"signature\":[{\"name\":\"isRobot\",\"type\":\"STRING\"},{\"name\":\"timestamp\",\"type\":\"STRING\"},{\"name\":\"cityName\",\"type\":\"STRING\"},{\"name\":\"countryIsoCode\",\"type\":\"STRING\"},{\"name\":\"regionName\",\"type\":\"STRING\"}]},"
|
||||
+ "\"intervals\":{\"type\":\"intervals\",\"intervals\":[\"-146136543-09-08T08:23:32.096Z/146140482-04-24T15:36:27.903Z\"]},\"resultFormat\":\"compactedList\",\"columns\":[\"cityName\",\"countryIsoCode\",\"isRobot\",\"regionName\",\"timestamp\"],\"legacy\":false,"
|
||||
+ "\"context\":{\"sqlInsertSegmentGranularity\":\"\\\"HOUR\\\"\",\"sqlQueryId\":\"dummy\",\"vectorize\":\"false\",\"vectorizeVirtualColumns\":\"false\"},\"granularity\":{\"type\":\"all\"}}], signature=[{isRobot:STRING, timestamp:STRING, cityName:STRING, countryIsoCode:STRING, regionName:STRING}])\n"
|
||||
+ " DruidQueryRel(query=[{\"queryType\":\"scan\",\"dataSource\":{\"type\":\"external\",\"inputSource\":{\"type\":\"http\",\"uris\":[\"https://foo.tsv\"]},\"inputFormat\":{\"type\":\"tsv\",\"delimiter\":\"\\t\",\"findColumnsFromHeader\":true},"
|
||||
+ "\"signature\":[{\"name\":\"Country\",\"type\":\"STRING\"},{\"name\":\"Capital\",\"type\":\"STRING\"},{\"name\":\"ISO3\",\"type\":\"STRING\"},{\"name\":\"ISO2\",\"type\":\"STRING\"}]},\"intervals\":{\"type\":\"intervals\",\"intervals\":[\"-146136543-09-08T08:23:32.096Z/146140482-04-24T15:36:27.903Z\"]},"
|
||||
+ "\"resultFormat\":\"compactedList\",\"columns\":[\"Capital\",\"ISO2\"],\"legacy\":false,\"context\":{\"sqlInsertSegmentGranularity\":\"\\\"HOUR\\\"\",\"sqlQueryId\":\"dummy\",\"vectorize\":\"false\",\"vectorizeVirtualColumns\":\"false\"},\"granularity\":{\"type\":\"all\"}}], signature=[{Capital:STRING, ISO2:STRING}])\n";
|
||||
// Use testQuery for EXPLAIN (not testIngestionQuery).
|
||||
testQuery(
|
||||
PLANNER_CONFIG_LEGACY_QUERY_EXPLAIN,
|
||||
ImmutableMap.of("sqlQueryId", "dummy"),
|
||||
Collections.emptyList(),
|
||||
sql,
|
||||
CalciteTests.SUPER_USER_AUTH_RESULT,
|
||||
ImmutableList.of(),
|
||||
new DefaultResultsVerifier(
|
||||
ImmutableList.of(
|
||||
new Object[]{
|
||||
legacyExplanation,
|
||||
resources,
|
||||
attributes
|
||||
}
|
||||
),
|
||||
null
|
||||
),
|
||||
null
|
||||
);
|
||||
|
||||
// Test correctness of the query when only the CLUSTERED BY clause is present
|
||||
final String explanation = "[{\"query\":{\"queryType\":\"scan\",\"dataSource\":{\"type\":\"join\",\"left\":{\"type\":\"external\",\"inputSource\":{\"type\":\"http\",\"uris\":[\"https://boo.gz\"]},\"inputFormat\":{\"type\":\"json\",\"keepNullColumns\":false,"
|
||||
+ "\"assumeNewlineDelimited\":false,\"useJsonNodeReader\":false},\"signature\":[{\"name\":\"isRobot\",\"type\":\"STRING\"},{\"name\":\"timestamp\",\"type\":\"STRING\"},{\"name\":\"cityName\",\"type\":\"STRING\"},{\"name\":\"countryIsoCode\",\"type\":\"STRING\"},"
|
||||
+ "{\"name\":\"regionName\",\"type\":\"STRING\"}]},\"right\":{\"type\":\"query\",\"query\":{\"queryType\":\"scan\",\"dataSource\":{\"type\":\"external\",\"inputSource\":{\"type\":\"http\",\"uris\":[\"https://foo.tsv\"]},\"inputFormat\":{\"type\":\"tsv\",\"delimiter\":\"\\t\",\"findColumnsFromHeader\":true},"
|
||||
+ "\"signature\":[{\"name\":\"Country\",\"type\":\"STRING\"},{\"name\":\"Capital\",\"type\":\"STRING\"},{\"name\":\"ISO3\",\"type\":\"STRING\"},{\"name\":\"ISO2\",\"type\":\"STRING\"}]},\"intervals\":{\"type\":\"intervals\","
|
||||
+ "\"intervals\":[\"-146136543-09-08T08:23:32.096Z/146140482-04-24T15:36:27.903Z\"]},\"resultFormat\":\"compactedList\",\"columns\":[\"Capital\",\"ISO2\"],\"legacy\":false,\"context\":{\"sqlInsertSegmentGranularity\":\"\\\"HOUR\\\"\",\"sqlQueryId\":\"dummy\",\"vectorize\":\"false\","
|
||||
+ "\"vectorizeVirtualColumns\":\"false\"},\"granularity\":{\"type\":\"all\"}}},\"rightPrefix\":\"j0.\",\"condition\":\"(\\\"countryIsoCode\\\" == \\\"j0.ISO2\\\")\",\"joinType\":\"LEFT\"},\"intervals\":{\"type\":\"intervals\",\"intervals\":[\"-146136543-09-08T08:23:32.096Z/146140482-04-24T15:36:27.903Z\"]},"
|
||||
+ "\"virtualColumns\":[{\"type\":\"expression\",\"name\":\"v0\",\"expression\":\"timestamp_parse(\\\"timestamp\\\",null,'UTC')\",\"outputType\":\"LONG\"}],\"resultFormat\":\"compactedList\",\"orderBy\":[{\"columnName\":\"v0\",\"order\":\"ascending\"},{\"columnName\":\"isRobot\",\"order\":\"ascending\"},"
|
||||
+ "{\"columnName\":\"j0.Capital\",\"order\":\"ascending\"},{\"columnName\":\"regionName\",\"order\":\"ascending\"}],\"columns\":[\"isRobot\",\"j0.Capital\",\"regionName\",\"v0\"],\"legacy\":false,\"context\":{\"sqlInsertSegmentGranularity\":\"\\\"HOUR\\\"\",\"sqlQueryId\":\"dummy\",\"vectorize\":\"false\","
|
||||
+ "\"vectorizeVirtualColumns\":\"false\"},\"granularity\":{\"type\":\"all\"}},\"signature\":[{\"name\":\"v0\",\"type\":\"LONG\"},{\"name\":\"isRobot\",\"type\":\"STRING\"},{\"name\":\"j0.Capital\",\"type\":\"STRING\"},{\"name\":\"regionName\",\"type\":\"STRING\"}],\"columnMappings\":[{\"queryColumn\":\"v0\",\"outputColumn\":\"__time\"},"
|
||||
+ "{\"queryColumn\":\"isRobot\",\"outputColumn\":\"isRobotAlias\"},{\"queryColumn\":\"j0.Capital\",\"outputColumn\":\"countryCapital\"},{\"queryColumn\":\"regionName\",\"outputColumn\":\"regionName\"}]}]";
|
||||
|
||||
testQuery(
|
||||
PLANNER_CONFIG_NATIVE_QUERY_EXPLAIN,
|
||||
ImmutableMap.of("sqlQueryId", "dummy"),
|
||||
Collections.emptyList(),
|
||||
sql,
|
||||
CalciteTests.SUPER_USER_AUTH_RESULT,
|
||||
ImmutableList.of(),
|
||||
new DefaultResultsVerifier(
|
||||
ImmutableList.of(
|
||||
new Object[]{
|
||||
explanation,
|
||||
resources,
|
||||
attributes
|
||||
}
|
||||
),
|
||||
null
|
||||
),
|
||||
null
|
||||
);
|
||||
|
||||
// Not using testIngestionQuery, so must set didTest manually to satisfy the check in tearDown.
|
||||
didTest = true;
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testExplainPlanInsertWithClusteredByDescThrowsException()
|
||||
{
|
||||
|
|
|
@ -936,6 +936,44 @@ public class CalciteReplaceDmlTest extends CalciteIngestionDmlTest
|
|||
.verify();
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testReplaceWithNonExistentOrdinalInClusteredBy()
|
||||
{
|
||||
skipVectorize();
|
||||
|
||||
final String sql = "REPLACE INTO dst"
|
||||
+ " OVERWRITE WHERE __time >= TIMESTAMP '2000-01-01 00:00:00' AND __time < TIMESTAMP '2000-01-02 00:00:00' "
|
||||
+ " SELECT * FROM foo"
|
||||
+ " PARTITIONED BY DAY"
|
||||
+ " CLUSTERED BY 1, 2, 100";
|
||||
|
||||
testIngestionQuery()
|
||||
.sql(sql)
|
||||
.expectValidationError(
|
||||
invalidSqlContains("Ordinal out of range")
|
||||
)
|
||||
.verify();
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testReplaceWithNegativeOrdinalInClusteredBy()
|
||||
{
|
||||
skipVectorize();
|
||||
|
||||
final String sql = "REPLACE INTO dst"
|
||||
+ " OVERWRITE WHERE __time >= TIMESTAMP '2000-01-01 00:00:00' AND __time < TIMESTAMP '2000-01-02 00:00:00' "
|
||||
+ " SELECT * FROM foo"
|
||||
+ " PARTITIONED BY DAY"
|
||||
+ " CLUSTERED BY 1, -2, 3 DESC";
|
||||
|
||||
testIngestionQuery()
|
||||
.sql(sql)
|
||||
.expectValidationError(
|
||||
invalidSqlIs("Ordinal [-2] specified in the CLUSTERED BY clause is invalid. It must be a positive integer.")
|
||||
)
|
||||
.verify();
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testReplaceFromExternalProjectSort()
|
||||
{
|
||||
|
|
|
@ -29,11 +29,10 @@ import org.apache.calcite.sql.SqlKind;
|
|||
import org.apache.calcite.sql.SqlLiteral;
|
||||
import org.apache.calcite.sql.SqlNode;
|
||||
import org.apache.calcite.sql.SqlNodeList;
|
||||
import org.apache.calcite.sql.SqlOrderBy;
|
||||
import org.apache.calcite.sql.SqlPostfixOperator;
|
||||
import org.apache.calcite.sql.SqlSelect;
|
||||
import org.apache.calcite.sql.fun.SqlStdOperatorTable;
|
||||
import org.apache.calcite.sql.parser.SqlParserPos;
|
||||
import org.apache.calcite.util.Pair;
|
||||
import org.apache.druid.error.DruidException;
|
||||
import org.apache.druid.error.DruidExceptionMatcher;
|
||||
import org.apache.druid.java.util.common.granularity.Granularities;
|
||||
|
@ -144,49 +143,47 @@ public class DruidSqlParserUtilsTest
|
|||
@Test
|
||||
public void testNullClusteredBy()
|
||||
{
|
||||
final SqlNodeList selectArgs = new SqlNodeList(SqlParserPos.ZERO);
|
||||
selectArgs.add(new SqlIdentifier("__time", new SqlParserPos(0, 1)));
|
||||
Assert.assertNull(DruidSqlParserUtils.resolveClusteredByColumnsToOutputColumns(
|
||||
null,
|
||||
new SqlSelect(SqlParserPos.ZERO, null, selectArgs, null, null, null, null, null, null, null, null)
|
||||
)
|
||||
final ImmutableList<Pair<Integer, String>> fields = ImmutableList.of(
|
||||
Pair.of(1, "__time"),
|
||||
Pair.of(2, "foo"),
|
||||
Pair.of(3, "bar")
|
||||
);
|
||||
Assert.assertNull(
|
||||
DruidSqlParserUtils.resolveClusteredByColumnsToOutputColumns(
|
||||
null,
|
||||
fields
|
||||
)
|
||||
);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testNullSource()
|
||||
public void testSimpledClusteredByWithNullSource()
|
||||
{
|
||||
final SqlNodeList args = new SqlNodeList(SqlParserPos.ZERO);
|
||||
args.add(new SqlIdentifier("__time", SqlParserPos.ZERO));
|
||||
args.add(new SqlIntervalQualifier(TimeUnit.DAY, null, SqlParserPos.ZERO));
|
||||
|
||||
IllegalArgumentException iae = Assert.assertThrows(
|
||||
IllegalArgumentException.class,
|
||||
() -> DruidSqlParserUtils.resolveClusteredByColumnsToOutputColumns(args, null)
|
||||
args.add(new SqlIdentifier("FOO", new SqlParserPos(0, 2)));
|
||||
SqlBasicCall sqlBasicCall1 = new SqlBasicCall(
|
||||
new SqlAsOperator(),
|
||||
new SqlNode[]{
|
||||
new SqlIdentifier("DIM3", SqlParserPos.ZERO),
|
||||
new SqlIdentifier("DIM3_ALIAS", SqlParserPos.ZERO)
|
||||
},
|
||||
new SqlParserPos(0, 3)
|
||||
);
|
||||
args.add(sqlBasicCall1);
|
||||
Assert.assertEquals(
|
||||
Arrays.asList("__time", "FOO", "DIM3_ALIAS"),
|
||||
DruidSqlParserUtils.resolveClusteredByColumnsToOutputColumns(args, null)
|
||||
);
|
||||
Assert.assertEquals("Source node must be either SqlSelect or SqlOrderBy, but found [null]", iae.getMessage());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testSimpleClusteredBy()
|
||||
{
|
||||
final SqlNodeList selectArgs = new SqlNodeList(SqlParserPos.ZERO);
|
||||
selectArgs.add(new SqlIdentifier("__time", new SqlParserPos(0, 1)));
|
||||
selectArgs.add(new SqlIdentifier("FOO", new SqlParserPos(0, 2)));
|
||||
selectArgs.add(new SqlIdentifier("BOO", new SqlParserPos(0, 3)));
|
||||
|
||||
final SqlSelect sqlSelect = new SqlSelect(
|
||||
SqlParserPos.ZERO,
|
||||
null,
|
||||
selectArgs,
|
||||
null,
|
||||
null,
|
||||
null,
|
||||
null,
|
||||
null,
|
||||
null,
|
||||
null,
|
||||
null
|
||||
final ImmutableList<Pair<Integer, String>> sourceFieldMappings = ImmutableList.of(
|
||||
Pair.of(1, "__time"),
|
||||
Pair.of(2, "FOO"),
|
||||
Pair.of(3, "BOO")
|
||||
);
|
||||
|
||||
final SqlNodeList clusteredByArgs = new SqlNodeList(SqlParserPos.ZERO);
|
||||
|
@ -196,42 +193,7 @@ public class DruidSqlParserUtilsTest
|
|||
|
||||
Assert.assertEquals(
|
||||
Arrays.asList("__time", "FOO", "BOO"),
|
||||
DruidSqlParserUtils.resolveClusteredByColumnsToOutputColumns(clusteredByArgs, sqlSelect)
|
||||
);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testClusteredByOrdinalInvalidThrowsException()
|
||||
{
|
||||
final SqlNodeList selectArgs = new SqlNodeList(SqlParserPos.ZERO);
|
||||
selectArgs.add(new SqlIdentifier("__time", new SqlParserPos(0, 1)));
|
||||
selectArgs.add(new SqlIdentifier("FOO", new SqlParserPos(0, 2)));
|
||||
selectArgs.add(new SqlIdentifier("BOO", new SqlParserPos(0, 3)));
|
||||
|
||||
final SqlSelect sqlSelect = new SqlSelect(
|
||||
SqlParserPos.ZERO,
|
||||
null,
|
||||
selectArgs,
|
||||
null,
|
||||
null,
|
||||
null,
|
||||
null,
|
||||
null,
|
||||
null,
|
||||
null,
|
||||
null
|
||||
);
|
||||
|
||||
final SqlNodeList clusteredByArgs = new SqlNodeList(SqlParserPos.ZERO);
|
||||
clusteredByArgs.add(new SqlIdentifier("__time", SqlParserPos.ZERO));
|
||||
clusteredByArgs.add(new SqlIdentifier("FOO", SqlParserPos.ZERO));
|
||||
clusteredByArgs.add(SqlLiteral.createExactNumeric("4", SqlParserPos.ZERO));
|
||||
|
||||
MatcherAssert.assertThat(
|
||||
Assert.assertThrows(DruidException.class, () -> DruidSqlParserUtils.resolveClusteredByColumnsToOutputColumns(clusteredByArgs, sqlSelect)),
|
||||
DruidExceptionMatcher.invalidSqlInput().expectMessageIs(
|
||||
"Ordinal[4] specified in the CLUSTERED BY clause is invalid. It must be between 1 and 3."
|
||||
)
|
||||
DruidSqlParserUtils.resolveClusteredByColumnsToOutputColumns(clusteredByArgs, sourceFieldMappings)
|
||||
);
|
||||
}
|
||||
|
||||
|
@ -272,18 +234,14 @@ public class DruidSqlParserUtilsTest
|
|||
args3.add(SqlLiteral.createCharString("PT1H", SqlParserPos.ZERO));
|
||||
selectArgs.add(TimeFloorOperatorConversion.SQL_FUNCTION.createCall(args3));
|
||||
|
||||
final SqlSelect sqlSelect = new SqlSelect(
|
||||
SqlParserPos.ZERO,
|
||||
null,
|
||||
selectArgs,
|
||||
null,
|
||||
null,
|
||||
null,
|
||||
null,
|
||||
null,
|
||||
null,
|
||||
null,
|
||||
null
|
||||
final ImmutableList<Pair<Integer, String>> sourceFieldMappings = ImmutableList.of(
|
||||
Pair.of(1, "__time"),
|
||||
Pair.of(2, "DIM3"),
|
||||
Pair.of(3, "DIM3_ALIAS"),
|
||||
Pair.of(4, "floor_dim4_time"),
|
||||
Pair.of(5, "DIM5"),
|
||||
Pair.of(5, "DIM6"),
|
||||
Pair.of(7, "TIME_FLOOR(\"timestamps\", 'PT1H')")
|
||||
);
|
||||
|
||||
// Construct the clustered by args
|
||||
|
@ -295,45 +253,7 @@ public class DruidSqlParserUtilsTest
|
|||
|
||||
Assert.assertEquals(
|
||||
Arrays.asList("DIM3_ALIAS", "floor_dim4_time", "DIM5", "TIME_FLOOR(\"timestamps\", 'PT1H')"),
|
||||
DruidSqlParserUtils.resolveClusteredByColumnsToOutputColumns(clusteredByArgs, sqlSelect)
|
||||
);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testSimpleClusteredByWithOrderBy()
|
||||
{
|
||||
final SqlNodeList selectArgs = new SqlNodeList(SqlParserPos.ZERO);
|
||||
selectArgs.add(new SqlIdentifier("__time", new SqlParserPos(0, 1)));
|
||||
selectArgs.add(new SqlIdentifier("FOO", new SqlParserPos(0, 2)));
|
||||
selectArgs.add(new SqlIdentifier("BOO", new SqlParserPos(0, 3)));
|
||||
|
||||
final SqlSelect sqlSelect = new SqlSelect(
|
||||
SqlParserPos.ZERO,
|
||||
null,
|
||||
selectArgs,
|
||||
null,
|
||||
null,
|
||||
null,
|
||||
null,
|
||||
null,
|
||||
null,
|
||||
null,
|
||||
null
|
||||
);
|
||||
|
||||
SqlNodeList orderList = new SqlNodeList(SqlParserPos.ZERO);
|
||||
orderList.add(sqlSelect);
|
||||
|
||||
SqlNode orderByNode = new SqlOrderBy(SqlParserPos.ZERO, sqlSelect, orderList, null, null);
|
||||
|
||||
final SqlNodeList clusteredByArgs = new SqlNodeList(SqlParserPos.ZERO);
|
||||
clusteredByArgs.add(new SqlIdentifier("__time", SqlParserPos.ZERO));
|
||||
clusteredByArgs.add(new SqlIdentifier("FOO", SqlParserPos.ZERO));
|
||||
clusteredByArgs.add(SqlLiteral.createExactNumeric("3", SqlParserPos.ZERO));
|
||||
|
||||
Assert.assertEquals(
|
||||
Arrays.asList("__time", "FOO", "BOO"),
|
||||
DruidSqlParserUtils.resolveClusteredByColumnsToOutputColumns(clusteredByArgs, orderByNode)
|
||||
DruidSqlParserUtils.resolveClusteredByColumnsToOutputColumns(clusteredByArgs, sourceFieldMappings)
|
||||
);
|
||||
}
|
||||
}
|
||||
|
@ -390,6 +310,24 @@ public class DruidSqlParserUtilsTest
|
|||
.expectMessageIs("Invalid CLUSTERED BY clause [`DIM4` DESC]: cannot sort in descending order.")
|
||||
.assertThrowsAndMatches(() -> DruidSqlParserUtils.validateClusteredByColumns(clusteredByArgs));
|
||||
}
|
||||
|
||||
/**
|
||||
* Tests clause "CLUSTERED BY DIM1, DIM2, 3, -10"
|
||||
*/
|
||||
@Test
|
||||
public void testClusteredByColumnsWithNegativeOrdinalThrowsException()
|
||||
{
|
||||
final SqlNodeList clusteredByArgs = new SqlNodeList(SqlParserPos.ZERO);
|
||||
clusteredByArgs.add(new SqlIdentifier("DIM1", SqlParserPos.ZERO));
|
||||
clusteredByArgs.add(new SqlIdentifier("DIM2", SqlParserPos.ZERO));
|
||||
clusteredByArgs.add(new SqlIdentifier("3", SqlParserPos.ZERO));
|
||||
clusteredByArgs.add(SqlLiteral.createExactNumeric("-10", SqlParserPos.ZERO));
|
||||
|
||||
DruidExceptionMatcher
|
||||
.invalidSqlInput()
|
||||
.expectMessageIs("Ordinal [-10] specified in the CLUSTERED BY clause is invalid. It must be a positive integer.")
|
||||
.assertThrowsAndMatches(() -> DruidSqlParserUtils.validateClusteredByColumns(clusteredByArgs));
|
||||
}
|
||||
}
|
||||
|
||||
public static class FloorToGranularityConversionErrorsTest
|
||||
|
|
Loading…
Reference in New Issue