From 1306965c9ea3c4a4a90667cdbd2b048467c2cb8d Mon Sep 17 00:00:00 2001 From: Adarsh Sanjeev Date: Wed, 27 Apr 2022 12:25:49 +0530 Subject: [PATCH] Validate select columns for insert statement (#12431) Unnamed columns in the select part of insert SQL statements currently create a table with the column name such as "EXPR$3". This PR adds a check for this. --- .../sql/calcite/planner/DruidPlanner.java | 16 ++++++ .../sql/calcite/CalciteInsertDmlTest.java | 56 ++++++++++++++++++- 2 files changed, 69 insertions(+), 3 deletions(-) diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/planner/DruidPlanner.java b/sql/src/main/java/org/apache/druid/sql/calcite/planner/DruidPlanner.java index 7b15a81eda2..b9ae4a353bd 100644 --- a/sql/src/main/java/org/apache/druid/sql/calcite/planner/DruidPlanner.java +++ b/sql/src/main/java/org/apache/druid/sql/calcite/planner/DruidPlanner.java @@ -99,11 +99,13 @@ import java.util.Iterator; import java.util.List; import java.util.Properties; import java.util.Set; +import java.util.regex.Pattern; import java.util.stream.Collectors; public class DruidPlanner implements Closeable { private static final EmittingLogger log = new EmittingLogger(DruidPlanner.class); + private static final Pattern UNNAMED_COLUMN_PATTERN = Pattern.compile("^EXPR\\$\\d+$", Pattern.CASE_INSENSITIVE); private final FrameworkConfig frameworkConfig; private final Planner planner; @@ -653,6 +655,7 @@ public class DruidPlanner implements Closeable { if (insert != null) { final String targetDataSource = validateAndGetDataSourceForInsert(insert); + validateColumnsForIngestion(rootQueryRel); return queryMakerFactory.buildForInsert(targetDataSource, rootQueryRel, plannerContext); } else { return queryMakerFactory.buildForSelect(rootQueryRel, plannerContext); @@ -717,6 +720,19 @@ public class DruidPlanner implements Closeable return dataSource; } + private void validateColumnsForIngestion(RelRoot rootQueryRel) throws ValidationException + { + // Check that there are no unnamed columns in the insert. + for (Pair field : rootQueryRel.fields) { + if (UNNAMED_COLUMN_PATTERN.matcher(field.right).matches()) { + throw new ValidationException("Cannot ingest expressions that do not have an alias " + + "or columns with names like EXPR$[digit]." + + "E.g. if you are ingesting \"func(X)\", then you can rewrite it as " + + "\"func(X) as myColumn\""); + } + } + } + private String buildSQLPlanningErrorMessage(Throwable exception) { String errorMessage = plannerContext.getPlanningError(); diff --git a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteInsertDmlTest.java b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteInsertDmlTest.java index bb75f9e120a..caa1ef84864 100644 --- a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteInsertDmlTest.java +++ b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteInsertDmlTest.java @@ -382,12 +382,12 @@ public class CalciteInsertDmlTest extends BaseCalciteQueryTest .add("__time", ColumnType.LONG) .add("floor_m1", ColumnType.FLOAT) .add("dim1", ColumnType.STRING) - .add("EXPR$3", ColumnType.DOUBLE) + .add("ceil_m2", ColumnType.DOUBLE) .build(); testInsertQuery() .sql( "INSERT INTO druid.dst " - + "SELECT __time, FLOOR(m1) as floor_m1, dim1, CEIL(m2) FROM foo " + + "SELECT __time, FLOOR(m1) as floor_m1, dim1, CEIL(m2) as ceil_m2 FROM foo " + "PARTITIONED BY FLOOR(__time TO DAY) CLUSTERED BY 2, dim1 DESC, CEIL(m2)" ) .expectTarget("dst", targetRowSignature) @@ -745,6 +745,53 @@ public class CalciteInsertDmlTest extends BaseCalciteQueryTest .verify(); } + @Test + public void testInsertWithUnnamedColumnInSelectStatement() + { + testInsertQuery() + .sql("INSERT INTO t SELECT dim1, dim2 || '-lol' FROM foo PARTITIONED BY ALL") + .expectValidationError( + SqlPlanningException.class, + "Cannot ingest expressions that do not have an alias " + + "or columns with names like EXPR$[digit]." + + "E.g. if you are ingesting \"func(X)\", then you can rewrite it as " + + "\"func(X) as myColumn\"" + ) + .verify(); + } + + @Test + public void testInsertWithInvalidColumnNameInIngest() + { + testInsertQuery() + .sql("INSERT INTO t SELECT __time, dim1 AS EXPR$0 FROM foo PARTITIONED BY ALL") + .expectValidationError( + SqlPlanningException.class, + "Cannot ingest expressions that do not have an alias " + + "or columns with names like EXPR$[digit]." + + "E.g. if you are ingesting \"func(X)\", then you can rewrite it as " + + "\"func(X) as myColumn\"" + ) + .verify(); + } + + @Test + public void testInsertWithUnnamedColumnInNestedSelectStatement() + { + testInsertQuery() + .sql("INSERT INTO test " + + "SELECT __time, * FROM " + + "(SELECT __time, LOWER(dim1) FROM foo) PARTITIONED BY ALL TIME") + .expectValidationError( + SqlPlanningException.class, + "Cannot ingest expressions that do not have an alias " + + "or columns with names like EXPR$[digit]." + + "E.g. if you are ingesting \"func(X)\", then you can rewrite it as " + + "\"func(X) as myColumn\"" + ) + .verify(); + } + private String externSql(final ExternalDataSource externalDataSource) { try { @@ -922,7 +969,10 @@ public class CalciteInsertDmlTest extends BaseCalciteQueryTest final Throwable e = Assert.assertThrows( Throwable.class, - () -> sqlLifecycle.validateAndAuthorize(authenticationResult) + () -> { + sqlLifecycle.validateAndAuthorize(authenticationResult); + sqlLifecycle.plan(); + } ); MatcherAssert.assertThat(e, validationErrorMatcher);