Extend the PARTITION BY clause to accept string literals for the time partitioning (#15836)

This PR contains a portion of the changes from the inactive draft PR for integrating the catalog with the Calcite planner https://github.com/apache/druid/pull/13686 from @paul-rogers, extending the PARTITION BY clause to accept string literals for the time partitioning
This commit is contained in:
zachjsh 2024-02-09 11:45:38 -05:00 committed by GitHub
parent 6e9eee4c5f
commit f9ee2c353b
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
21 changed files with 426 additions and 118 deletions

View File

@ -287,6 +287,33 @@ The following ISO 8601 periods are supported for `TIME_FLOOR` and the string con
- P3M - P3M
- P1Y - P1Y
The string constant can also include any of the keywords mentioned above:
- `HOUR` - Same as `'PT1H'`
- `DAY` - Same as `'P1D'`
- `MONTH` - Same as `'P1M'`
- `YEAR` - Same as `'P1Y'`
- `ALL TIME`
- `ALL` - Alias for `ALL TIME`
The `WEEK` granularity is deprecated and not supported in MSQ.
Examples:
```SQL
-- Keyword
PARTITIONED BY HOUR
-- String literal
PARTITIONED BY 'HOUR'
-- ISO 8601 period
PARTITIONED BY 'PT1H'
-- TIME_FLOOR function
PARTITIONED BY TIME_FLOOR(__time, 'PT1H')
```
For more information about partitioning, see [Partitioning](concepts.md#partitioning-by-time). <br /><br /> For more information about partitioning, see [Partitioning](concepts.md#partitioning-by-time). <br /><br />
*Avoid partitioning by week, `P1W`, because weeks don't align neatly with months and years, making it difficult to partition by coarser granularities later. *Avoid partitioning by week, `P1W`, because weeks don't align neatly with months and years, making it difficult to partition by coarser granularities later.

View File

@ -54,6 +54,7 @@ data: {
"org.apache.calcite.sql.SqlNodeList" "org.apache.calcite.sql.SqlNodeList"
"org.apache.calcite.sql.SqlBasicCall" "org.apache.calcite.sql.SqlBasicCall"
"org.apache.druid.java.util.common.granularity.Granularity" "org.apache.druid.java.util.common.granularity.Granularity"
"org.apache.druid.java.util.common.granularity.GranularityType"
"org.apache.druid.java.util.common.granularity.Granularities" "org.apache.druid.java.util.common.granularity.Granularities"
"org.apache.druid.sql.calcite.parser.DruidSqlInsert" "org.apache.druid.sql.calcite.parser.DruidSqlInsert"
"org.apache.druid.sql.calcite.parser.DruidSqlParserUtils" "org.apache.druid.sql.calcite.parser.DruidSqlParserUtils"

View File

@ -17,8 +17,7 @@
* under the License. * under the License.
*/ */
// Using fully qualified name for Pair class, since Calcite also has a same class name being used in the Parser.jj SqlGranularityLiteral PartitionGranularity() :
org.apache.druid.java.util.common.Pair<Granularity, String> PartitionGranularity() :
{ {
SqlNode e; SqlNode e;
Granularity granularity; Granularity granularity;
@ -69,7 +68,7 @@ org.apache.druid.java.util.common.Pair<Granularity, String> PartitionGranularity
} }
) )
{ {
return new org.apache.druid.java.util.common.Pair(granularity, unparseString); return new SqlGranularityLiteral(granularity, unparseString, getPos());
} }
} }

View File

@ -33,7 +33,7 @@ SqlNode DruidSqlInsertEof() :
final SqlNodeList columnList; final SqlNodeList columnList;
final Span s; final Span s;
final Pair<SqlNodeList, SqlNodeList> p; final Pair<SqlNodeList, SqlNodeList> p;
org.apache.druid.java.util.common.Pair<Granularity, String> partitionedBy = new org.apache.druid.java.util.common.Pair(null, null); SqlGranularityLiteral partitionedBy = null;
SqlNodeList clusteredBy = null; SqlNodeList clusteredBy = null;
String exportFileFormat = null; String exportFileFormat = null;
} }
@ -93,7 +93,7 @@ SqlNode DruidSqlInsertEof() :
clusteredBy = ClusteredBy() clusteredBy = ClusteredBy()
] ]
{ {
if (clusteredBy != null && partitionedBy.lhs == null) { if (clusteredBy != null && partitionedBy == null) {
throw org.apache.druid.sql.calcite.parser.DruidSqlParserUtils.problemParsing( throw org.apache.druid.sql.calcite.parser.DruidSqlParserUtils.problemParsing(
"CLUSTERED BY found before PARTITIONED BY, CLUSTERED BY must come after the PARTITIONED BY clause" "CLUSTERED BY found before PARTITIONED BY, CLUSTERED BY must come after the PARTITIONED BY clause"
); );
@ -112,6 +112,6 @@ SqlNode DruidSqlInsertEof() :
return insertNode; return insertNode;
} }
SqlInsert sqlInsert = (SqlInsert) insertNode; SqlInsert sqlInsert = (SqlInsert) insertNode;
return new DruidSqlInsert(sqlInsert, partitionedBy.lhs, partitionedBy.rhs, clusteredBy, exportFileFormat); return DruidSqlInsert.create(sqlInsert, partitionedBy, clusteredBy, exportFileFormat);
} }
} }

View File

@ -26,8 +26,7 @@ SqlNode DruidSqlReplaceEof() :
final Span s; final Span s;
SqlNode tableRef = null; SqlNode tableRef = null;
SqlInsert sqlInsert; SqlInsert sqlInsert;
// Using fully qualified name for Pair class, since Calcite also has a same class name being used in the Parser.jj SqlGranularityLiteral partitionedBy = null;
org.apache.druid.java.util.common.Pair<Granularity, String> partitionedBy = new org.apache.druid.java.util.common.Pair(null, null);
SqlNodeList clusteredBy = null; SqlNodeList clusteredBy = null;
final Pair<SqlNodeList, SqlNodeList> p; final Pair<SqlNodeList, SqlNodeList> p;
SqlNode replaceTimeQuery = null; SqlNode replaceTimeQuery = null;
@ -78,7 +77,7 @@ SqlNode DruidSqlReplaceEof() :
clusteredBy = ClusteredBy() clusteredBy = ClusteredBy()
] ]
{ {
if (clusteredBy != null && partitionedBy.lhs == null) { if (clusteredBy != null && partitionedBy == null) {
throw org.apache.druid.sql.calcite.parser.DruidSqlParserUtils.problemParsing( throw org.apache.druid.sql.calcite.parser.DruidSqlParserUtils.problemParsing(
"CLUSTERED BY found before PARTITIONED BY, CLUSTERED BY must come after the PARTITIONED BY clause" "CLUSTERED BY found before PARTITIONED BY, CLUSTERED BY must come after the PARTITIONED BY clause"
); );
@ -91,7 +90,7 @@ SqlNode DruidSqlReplaceEof() :
<EOF> <EOF>
{ {
sqlInsert = new SqlInsert(s.end(source), SqlNodeList.EMPTY, destination, source, columnList); sqlInsert = new SqlInsert(s.end(source), SqlNodeList.EMPTY, destination, source, columnList);
return new DruidSqlReplace(sqlInsert, partitionedBy.lhs, partitionedBy.rhs, clusteredBy, replaceTimeQuery, exportFileFormat); return DruidSqlReplace.create(sqlInsert, partitionedBy, clusteredBy, replaceTimeQuery, exportFileFormat);
} }
} }

View File

@ -23,9 +23,10 @@ import org.apache.calcite.sql.SqlInsert;
import org.apache.calcite.sql.SqlNode; import org.apache.calcite.sql.SqlNode;
import org.apache.calcite.sql.SqlNodeList; import org.apache.calcite.sql.SqlNodeList;
import org.apache.calcite.sql.parser.SqlParserPos; import org.apache.calcite.sql.parser.SqlParserPos;
import org.apache.druid.java.util.common.granularity.Granularity; import org.apache.calcite.util.ImmutableNullableList;
import javax.annotation.Nullable; import javax.annotation.Nullable;
import java.util.List;
/** /**
* Common base class to the two Druid "ingest" statements: INSERT and REPLACE. * Common base class to the two Druid "ingest" statements: INSERT and REPLACE.
@ -37,10 +38,7 @@ public abstract class DruidSqlIngest extends SqlInsert
public static final String SQL_EXPORT_FILE_FORMAT = "__exportFileFormat"; public static final String SQL_EXPORT_FILE_FORMAT = "__exportFileFormat";
@Nullable @Nullable
protected final Granularity partitionedBy; protected final SqlGranularityLiteral partitionedBy;
// Used in the unparse function to generate the original query since we convert the string to an enum
protected final String partitionedByStringForUnparse;
@Nullable @Nullable
protected final SqlNodeList clusteredBy; protected final SqlNodeList clusteredBy;
@ -53,22 +51,20 @@ public abstract class DruidSqlIngest extends SqlInsert
SqlNode targetTable, SqlNode targetTable,
SqlNode source, SqlNode source,
SqlNodeList columnList, SqlNodeList columnList,
@Nullable Granularity partitionedBy, @Nullable SqlGranularityLiteral partitionedBy,
@Nullable String partitionedByStringForUnparse,
@Nullable SqlNodeList clusteredBy, @Nullable SqlNodeList clusteredBy,
@Nullable String exportFileFormat @Nullable String exportFileFormat
) )
{ {
super(pos, keywords, targetTable, source, columnList); super(pos, keywords, targetTable, source, columnList);
this.partitionedByStringForUnparse = partitionedByStringForUnparse;
this.partitionedBy = partitionedBy; this.partitionedBy = partitionedBy;
this.clusteredBy = clusteredBy; this.clusteredBy = clusteredBy;
this.exportFileFormat = exportFileFormat; this.exportFileFormat = exportFileFormat;
} }
@Nullable @Nullable
public Granularity getPartitionedBy() public SqlGranularityLiteral getPartitionedBy()
{ {
return partitionedBy; return partitionedBy;
} }
@ -84,4 +80,14 @@ public abstract class DruidSqlIngest extends SqlInsert
{ {
return exportFileFormat; return exportFileFormat;
} }
@Override
public List<SqlNode> getOperandList()
{
return ImmutableNullableList.<SqlNode>builder()
.addAll(super.getOperandList())
.add(partitionedBy)
.add(clusteredBy)
.build();
}
} }

View File

@ -24,7 +24,7 @@ import org.apache.calcite.sql.SqlNode;
import org.apache.calcite.sql.SqlNodeList; import org.apache.calcite.sql.SqlNodeList;
import org.apache.calcite.sql.SqlOperator; import org.apache.calcite.sql.SqlOperator;
import org.apache.calcite.sql.SqlWriter; import org.apache.calcite.sql.SqlWriter;
import org.apache.druid.java.util.common.granularity.Granularity; import org.apache.calcite.sql.parser.SqlParserPos;
import javax.annotation.Nonnull; import javax.annotation.Nonnull;
import javax.annotation.Nullable; import javax.annotation.Nullable;
@ -41,28 +41,49 @@ public class DruidSqlInsert extends DruidSqlIngest
// This allows reusing super.unparse // This allows reusing super.unparse
public static final SqlOperator OPERATOR = SqlInsert.OPERATOR; public static final SqlOperator OPERATOR = SqlInsert.OPERATOR;
/** public static DruidSqlInsert create(
* While partitionedBy and partitionedByStringForUnparse can be null as arguments to the constructor, this is
* disallowed (semantically) and the constructor performs checks to ensure that. This helps in producing friendly
* errors when the PARTITIONED BY custom clause is not present, and keeps its error separate from JavaCC/Calcite's
* custom errors which can be cryptic when someone accidentally forgets to explicitly specify the PARTITIONED BY clause
*/
public DruidSqlInsert(
@Nonnull SqlInsert insertNode, @Nonnull SqlInsert insertNode,
@Nullable Granularity partitionedBy, @Nullable SqlGranularityLiteral partitionedBy,
@Nullable String partitionedByStringForUnparse,
@Nullable SqlNodeList clusteredBy, @Nullable SqlNodeList clusteredBy,
@Nullable String exportFileFormat @Nullable String exportFileFormat
) )
{ {
super( return new DruidSqlInsert(
insertNode.getParserPosition(), insertNode.getParserPosition(),
(SqlNodeList) insertNode.getOperandList().get(0), // No better getter to extract this (SqlNodeList) insertNode.getOperandList().get(0), // No better getter to extract this
insertNode.getTargetTable(), insertNode.getTargetTable(),
insertNode.getSource(), insertNode.getSource(),
insertNode.getTargetColumnList(), insertNode.getTargetColumnList(),
partitionedBy, partitionedBy,
partitionedByStringForUnparse, clusteredBy,
exportFileFormat
);
}
/**
* While partitionedBy can be null as arguments to the constructor, this is
* disallowed (semantically) and the constructor performs checks to ensure that. This helps in producing friendly
* errors when the PARTITIONED BY custom clause is not present, and keeps its error separate from JavaCC/Calcite's
* custom errors which can be cryptic when someone accidentally forgets to explicitly specify the PARTITIONED BY clause
*/
public DruidSqlInsert(
SqlParserPos pos,
SqlNodeList keywords,
SqlNode targetTable,
SqlNode source,
SqlNodeList columnList,
@Nullable SqlGranularityLiteral partitionedBy,
@Nullable SqlNodeList clusteredBy,
@Nullable String exportFileFormat
)
{
super(
pos,
keywords,
targetTable,
source,
columnList,
partitionedBy,
clusteredBy, clusteredBy,
exportFileFormat exportFileFormat
); );
@ -95,9 +116,9 @@ public class DruidSqlInsert extends DruidSqlIngest
getSource().unparse(writer, 0, 0); getSource().unparse(writer, 0, 0);
writer.newlineAndIndent(); writer.newlineAndIndent();
if (partitionedByStringForUnparse != null) { if (getPartitionedBy() != null) {
writer.keyword("PARTITIONED BY"); writer.keyword("PARTITIONED BY");
writer.keyword(partitionedByStringForUnparse); getPartitionedBy().unparse(writer, leftPrec, rightPrec);
} }
if (getClusteredBy() != null) { if (getClusteredBy() != null) {

View File

@ -19,6 +19,7 @@
package org.apache.druid.sql.calcite.parser; package org.apache.druid.sql.calcite.parser;
import com.google.common.annotations.VisibleForTesting;
import com.google.common.base.Preconditions; import com.google.common.base.Preconditions;
import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableSet; import com.google.common.collect.ImmutableSet;
@ -77,6 +78,16 @@ public class DruidSqlParserUtils
private static final Logger log = new Logger(DruidSqlParserUtils.class); private static final Logger log = new Logger(DruidSqlParserUtils.class);
public static final String ALL = "all"; public static final String ALL = "all";
private static final List<GranularityType> DOCUMENTED_GRANULARTIES = Arrays.stream(GranularityType.values())
.filter(g -> g != GranularityType.WEEK)
.collect(Collectors.toList());
@VisibleForTesting
public static final String PARTITION_ERROR_MESSAGE =
"Invalid granularity[%s] specified after PARTITIONED BY clause. "
+ "Expected "
+ StringUtils.replace(StringUtils.replace(DOCUMENTED_GRANULARTIES.toString(), "[", ""), "]", ",").trim()
+ " ALL TIME, FLOOR() or TIME_FLOOR()";
/** /**
* Delegates to {@code convertSqlNodeToGranularity} and converts the exceptions to {@link ParseException} * Delegates to {@code convertSqlNodeToGranularity} and converts the exceptions to {@link ParseException}
* with the underlying message * with the underlying message
@ -96,28 +107,63 @@ public class DruidSqlParserUtils
} }
/** /**
* This method is used to extract the granularity from a SqlNode representing following function calls: * This method is used to extract the granularity from a SqlNode which represents
* 1. FLOOR(__time TO TimeUnit) * the argument to the {@code PARTITIONED BY} clause. The node can be any of the following:
* 2. TIME_FLOOR(__time, 'PT1H') * <ul>
* <li>A literal with a string that matches the SQL keywords
* {@code HOUR, DAY, MONTH, YEAR, ALL [TIME]}</li>
* <li>A literal string with a period in ISO 8601 format.</li>
* <li>Function call: {@code FLOOR(__time TO TimeUnit)}</li>
* <li>Function call: TIME_FLOOR(__time, 'PT1H')}</li>
* </ul>
* <p> * <p>
* Validation on the sqlNode is contingent to following conditions: * Validation of the function sqlNode is contingent to following conditions:
* 1. sqlNode is an instance of SqlCall * <ol>
* 2. Operator is either one of TIME_FLOOR or FLOOR * <li>sqlNode is an instance of SqlCall</li>
* 3. Number of operands in the call are 2 * <li>Operator is either one of TIME_FLOOR or FLOOR</li>
* 4. First operand is a SimpleIdentifier representing __time * <li>Number of operands in the call are 2</li>
* 5. If operator is TIME_FLOOR, the second argument is a literal, and can be converted to the Granularity class * <li>First operand is a SimpleIdentifier representing __time</li>
* 6. If operator is FLOOR, the second argument is a TimeUnit, and can be mapped using {@link TimeUnits} * <li>If operator is TIME_FLOOR, the second argument is a literal, and can be converted to the Granularity class</li>
* <li>If operator is FLOOR, the second argument is a TimeUnit, and can be mapped using {@link TimeUnits}</li>
* </ol>
* <p> * <p>
* Since it is to be used primarily while parsing the SqlNode, it is wrapped in {@code convertSqlNodeToGranularityThrowingParseExceptions} * This method is called during validation, which will catch any errors. It is then called again
* during conversion, at which time we assume the node is valid.
* *
* @param sqlNode SqlNode representing a call to a function * @param sqlNode SqlNode representing a call to a function
* *
* @return Granularity as intended by the function call * @return Granularity as intended by the function call
* *
* @throws ParseException SqlNode cannot be converted a granularity * @throws DruidException if SqlNode cannot be converted to a granularity
*/ */
public static Granularity convertSqlNodeToGranularity(SqlNode sqlNode) throws ParseException @Nullable
public static Granularity convertSqlNodeToGranularity(SqlNode sqlNode)
{ {
if (sqlNode == null) {
return null;
}
if (sqlNode instanceof SqlLiteral) {
final Granularity retVal;
SqlLiteral literal = (SqlLiteral) sqlNode;
if (SqlLiteral.valueMatchesType(literal.getValue(), SqlTypeName.CHAR)) {
retVal = convertSqlLiteralCharToGranularity(literal);
} else {
throw makeInvalidPartitionByException(literal);
}
validateSupportedGranularityForPartitionedBy(sqlNode, retVal);
return retVal;
}
if (sqlNode instanceof SqlIdentifier) {
SqlIdentifier identifier = (SqlIdentifier) sqlNode;
final Granularity retVal;
retVal = convertSqlIdentiferToGranularity(identifier);
validateSupportedGranularityForPartitionedBy(sqlNode, retVal);
return retVal;
}
if (!(sqlNode instanceof SqlCall)) { if (!(sqlNode instanceof SqlCall)) {
throw makeInvalidPartitionByException(sqlNode); throw makeInvalidPartitionByException(sqlNode);
} }
@ -166,7 +212,9 @@ public class DruidSqlParserUtils
period = new Period(granularityString); period = new Period(granularityString);
} }
catch (IllegalArgumentException e) { catch (IllegalArgumentException e) {
throw new ParseException(StringUtils.format("%s is an invalid period string", granularitySqlNode.toString())); throw InvalidSqlInput.exception(
StringUtils.format("granularity[%s] is an invalid period string", granularitySqlNode.toString()),
sqlNode);
} }
final PeriodGranularity retVal = new PeriodGranularity(period, null, null); final PeriodGranularity retVal = new PeriodGranularity(period, null, null);
validateSupportedGranularityForPartitionedBy(sqlNode, retVal); validateSupportedGranularityForPartitionedBy(sqlNode, retVal);
@ -200,17 +248,51 @@ public class DruidSqlParserUtils
throw makeInvalidPartitionByException(sqlNode); throw makeInvalidPartitionByException(sqlNode);
} }
private static Granularity convertSqlLiteralCharToGranularity(SqlLiteral literal)
{
String value = literal.getValueAs(String.class);
try {
return Granularity.fromString(value);
}
catch (IllegalArgumentException e) {
try {
return new PeriodGranularity(new Period(value), null, null);
}
catch (Exception e2) {
throw makeInvalidPartitionByException(literal);
}
}
}
private static Granularity convertSqlIdentiferToGranularity(SqlIdentifier identifier)
{
if (identifier.names.isEmpty()) {
throw makeInvalidPartitionByException(identifier);
}
String value = identifier.names.get(0);
try {
return Granularity.fromString(value);
}
catch (IllegalArgumentException e) {
try {
return new PeriodGranularity(new Period(value), null, null);
}
catch (Exception e2) {
throw makeInvalidPartitionByException(identifier);
}
}
}
private static DruidException makeInvalidPartitionByException(SqlNode sqlNode) private static DruidException makeInvalidPartitionByException(SqlNode sqlNode)
{ {
return InvalidSqlInput.exception( return InvalidSqlInput.exception(
"Invalid granularity [%s] after PARTITIONED BY. " PARTITION_ERROR_MESSAGE,
+ "Expected HOUR, DAY, MONTH, YEAR, ALL TIME, FLOOR() or TIME_FLOOR()",
sqlNode sqlNode
); );
} }
/** /**
* This method validates and converts a {@link SqlNode} representing a query into an optimized list of intervals to * Validates and converts a {@link SqlNode} representing a query into an optimized list of intervals to
* be used in creating an ingestion spec. If the sqlNode is an SqlLiteral of {@link #ALL}, returns a singleton list of * be used in creating an ingestion spec. If the sqlNode is an SqlLiteral of {@link #ALL}, returns a singleton list of
* "ALL". Otherwise, it converts and optimizes the query using {@link MoveTimeFiltersToIntervals} into a list of * "ALL". Otherwise, it converts and optimizes the query using {@link MoveTimeFiltersToIntervals} into a list of
* intervals which contain all valid values of time as per the query. * intervals which contain all valid values of time as per the query.
@ -579,7 +661,10 @@ public class DruidSqlParserUtils
return String.valueOf(zonedTimestamp.toInstant().toEpochMilli()); return String.valueOf(zonedTimestamp.toInstant().toEpochMilli());
} }
public static void validateSupportedGranularityForPartitionedBy(SqlNode originalNode, Granularity granularity) public static void validateSupportedGranularityForPartitionedBy(
@Nullable SqlNode originalNode,
Granularity granularity
)
{ {
if (!GranularityType.isStandard(granularity)) { if (!GranularityType.isStandard(granularity)) {
throw InvalidSqlInput.exception( throw InvalidSqlInput.exception(

View File

@ -27,10 +27,12 @@ import org.apache.calcite.sql.SqlNodeList;
import org.apache.calcite.sql.SqlOperator; import org.apache.calcite.sql.SqlOperator;
import org.apache.calcite.sql.SqlSpecialOperator; import org.apache.calcite.sql.SqlSpecialOperator;
import org.apache.calcite.sql.SqlWriter; import org.apache.calcite.sql.SqlWriter;
import org.apache.druid.java.util.common.granularity.Granularity; import org.apache.calcite.sql.parser.SqlParserPos;
import org.apache.calcite.util.ImmutableNullableList;
import javax.annotation.Nonnull; import javax.annotation.Nonnull;
import javax.annotation.Nullable; import javax.annotation.Nullable;
import java.util.List;
/** /**
* Extends the 'replace' call to hold custom parameters specific to Druid i.e. PARTITIONED BY and the PARTITION SPECS * Extends the 'replace' call to hold custom parameters specific to Druid i.e. PARTITIONED BY and the PARTITION SPECS
@ -45,29 +47,52 @@ public class DruidSqlReplace extends DruidSqlIngest
private final SqlNode replaceTimeQuery; private final SqlNode replaceTimeQuery;
/** public static DruidSqlReplace create(
* While partitionedBy and partitionedByStringForUnparse can be null as arguments to the constructor, this is
* disallowed (semantically) and the constructor performs checks to ensure that. This helps in producing friendly
* errors when the PARTITIONED BY custom clause is not present, and keeps its error separate from JavaCC/Calcite's
* custom errors which can be cryptic when someone accidentally forgets to explicitly specify the PARTITIONED BY clause
*/
public DruidSqlReplace(
@Nonnull SqlInsert insertNode, @Nonnull SqlInsert insertNode,
@Nullable Granularity partitionedBy, @Nullable SqlGranularityLiteral partitionedBy,
@Nullable String partitionedByStringForUnparse,
@Nullable SqlNodeList clusteredBy, @Nullable SqlNodeList clusteredBy,
@Nullable SqlNode replaceTimeQuery, @Nullable SqlNode replaceTimeQuery,
@Nullable String exportFileFormat @Nullable String exportFileFormat
) )
{ {
super( return new DruidSqlReplace(
insertNode.getParserPosition(), insertNode.getParserPosition(),
(SqlNodeList) insertNode.getOperandList().get(0), // No better getter to extract this (SqlNodeList) insertNode.getOperandList().get(0), // No better getter to extract this
insertNode.getTargetTable(), insertNode.getTargetTable(),
insertNode.getSource(), insertNode.getSource(),
insertNode.getTargetColumnList(), insertNode.getTargetColumnList(),
partitionedBy, partitionedBy,
partitionedByStringForUnparse, clusteredBy,
replaceTimeQuery,
exportFileFormat
);
}
/**
* While partitionedBy can be null as arguments to the constructor, this is
* disallowed (semantically) and the constructor performs checks to ensure that. This helps in producing friendly
* errors when the PARTITIONED BY custom clause is not present, and keeps its error separate from JavaCC/Calcite's
* custom errors which can be cryptic when someone accidentally forgets to explicitly specify the PARTITIONED BY clause
*/
public DruidSqlReplace(
SqlParserPos pos,
SqlNodeList keywords,
SqlNode targetTable,
SqlNode source,
SqlNodeList columnList,
@Nullable SqlGranularityLiteral partitionedBy,
@Nullable SqlNodeList clusteredBy,
@Nullable SqlNode replaceTimeQuery,
@Nullable String exportFileFormat
)
{
super(
pos,
keywords,
targetTable,
source,
columnList,
partitionedBy,
clusteredBy, clusteredBy,
exportFileFormat exportFileFormat
); );
@ -87,6 +112,15 @@ public class DruidSqlReplace extends DruidSqlIngest
return OPERATOR; return OPERATOR;
} }
@Override
public List<SqlNode> getOperandList()
{
return ImmutableNullableList.<SqlNode>builder()
.addAll(super.getOperandList())
.add(replaceTimeQuery)
.build();
}
@Override @Override
public void unparse(SqlWriter writer, int leftPrec, int rightPrec) public void unparse(SqlWriter writer, int leftPrec, int rightPrec)
{ {
@ -118,9 +152,9 @@ public class DruidSqlReplace extends DruidSqlIngest
getSource().unparse(writer, 0, 0); getSource().unparse(writer, 0, 0);
writer.newlineAndIndent(); writer.newlineAndIndent();
if (partitionedByStringForUnparse != null) { if (getPartitionedBy() != null) {
writer.keyword("PARTITIONED BY"); writer.keyword("PARTITIONED BY");
writer.keyword(partitionedByStringForUnparse); getPartitionedBy().unparse(writer, 0, 0);
} }
if (getClusteredBy() != null) { if (getClusteredBy() != null) {

View File

@ -0,0 +1,77 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.druid.sql.calcite.parser;
import org.apache.calcite.sql.SqlIdentifier;
import org.apache.calcite.sql.SqlLiteral;
import org.apache.calcite.sql.SqlWriter;
import org.apache.calcite.sql.parser.SqlParserPos;
import org.apache.calcite.sql.type.SqlTypeName;
import org.apache.calcite.util.NlsString;
import org.apache.druid.error.DruidException;
import org.apache.druid.java.util.common.granularity.Granularity;
import javax.annotation.Nonnull;
/**
* Extends the {@link SqlIdentifier} to hold parameters for the PARTITIONED BY clause.
*/
public class SqlGranularityLiteral extends SqlLiteral
{
private String unparseString;
private Granularity granularity;
public SqlGranularityLiteral(
@Nonnull Granularity granularity,
@Nonnull String unparseString,
SqlParserPos pos)
{
super(new NlsString(unparseString, null, null), SqlTypeName.CHAR, pos);
this.granularity = granularity;
this.unparseString = unparseString;
}
@Override
public SqlGranularityLiteral clone(SqlParserPos pos)
{
return new SqlGranularityLiteral(granularity, unparseString, pos);
}
@Override
@Deprecated
public Object clone()
{
throw DruidException.defensive("Function is deprecated, please use clone(SqlNode) instead.");
}
@Nonnull
public Granularity getGranularity()
{
return granularity;
}
@Override
public void unparse(SqlWriter writer, int leftPrec, int rightPrec)
{
if (unparseString != null) {
writer.keyword(unparseString);
}
}
}

View File

@ -72,7 +72,7 @@ public abstract class IngestHandler extends QueryHandler
) )
{ {
super(handlerContext, queryNode, explain); super(handlerContext, queryNode, explain);
this.ingestionGranularity = ingestNode.getPartitionedBy(); ingestionGranularity = ingestNode.getPartitionedBy() != null ? ingestNode.getPartitionedBy().getGranularity() : null;
handlerContext.hook().captureInsert(ingestNode); handlerContext.hook().captureInsert(ingestNode);
} }

View File

@ -47,6 +47,7 @@ import org.apache.druid.sql.calcite.external.ExternalDataSource;
import org.apache.druid.sql.calcite.external.Externals; import org.apache.druid.sql.calcite.external.Externals;
import org.apache.druid.sql.calcite.filtration.Filtration; import org.apache.druid.sql.calcite.filtration.Filtration;
import org.apache.druid.sql.calcite.parser.DruidSqlInsert; import org.apache.druid.sql.calcite.parser.DruidSqlInsert;
import org.apache.druid.sql.calcite.parser.DruidSqlParserUtils;
import org.apache.druid.sql.calcite.planner.Calcites; import org.apache.druid.sql.calcite.planner.Calcites;
import org.apache.druid.sql.calcite.planner.PlannerContext; import org.apache.druid.sql.calcite.planner.PlannerContext;
import org.apache.druid.sql.calcite.util.CalciteTests; import org.apache.druid.sql.calcite.util.CalciteTests;
@ -1016,6 +1017,47 @@ public class CalciteInsertDmlTest extends CalciteIngestionDmlTest
.verify(); .verify();
} }
@Test
public void testInsertPeriodFormGranularityWithClusteredBy()
{
// Test correctness of the query when only the CLUSTERED BY clause is present
RowSignature targetRowSignature = RowSignature.builder()
.add("__time", ColumnType.LONG)
.add("floor_m1", ColumnType.FLOAT)
.add("dim1", ColumnType.STRING)
.add("ceil_m2", ColumnType.DOUBLE)
.build();
testIngestionQuery()
.sql(
"INSERT INTO druid.dst "
+ "SELECT __time, FLOOR(m1) as floor_m1, dim1, CEIL(m2) as ceil_m2 FROM foo "
+ "PARTITIONED BY P1D CLUSTERED BY 2, dim1, CEIL(m2)"
)
.expectTarget("dst", targetRowSignature)
.expectResources(dataSourceRead("foo"), dataSourceWrite("dst"))
.expectQuery(
newScanQueryBuilder()
.dataSource("foo")
.intervals(querySegmentSpec(Filtration.eternity()))
.columns("__time", "dim1", "v0", "v1")
.virtualColumns(
expressionVirtualColumn("v0", "floor(\"m1\")", ColumnType.FLOAT),
expressionVirtualColumn("v1", "ceil(\"m2\")", ColumnType.DOUBLE)
)
.orderBy(
ImmutableList.of(
new ScanQuery.OrderBy("v0", ScanQuery.Order.ASCENDING),
new ScanQuery.OrderBy("dim1", ScanQuery.Order.ASCENDING),
new ScanQuery.OrderBy("v1", ScanQuery.Order.ASCENDING)
)
)
.context(queryContextWithGranularity(Granularities.DAY))
.build()
)
.expectLogicalPlanFrom("insertPartitionedByP1DWithClusteredBy")
.verify();
}
@Test @Test
public void testInsertWithoutPartitionedByWithClusteredBy() public void testInsertWithoutPartitionedByWithClusteredBy()
{ {
@ -1130,8 +1172,7 @@ public class CalciteInsertDmlTest extends CalciteIngestionDmlTest
MatcherAssert.assertThat( MatcherAssert.assertThat(
e, e,
invalidSqlIs( invalidSqlIs(
"Invalid granularity ['invalid_granularity'] after PARTITIONED BY. " StringUtils.format(DruidSqlParserUtils.PARTITION_ERROR_MESSAGE, "'invalid_granularity'")
+ "Expected HOUR, DAY, MONTH, YEAR, ALL TIME, FLOOR() or TIME_FLOOR()"
)); ));
} }
didTest = true; didTest = true;

View File

@ -604,10 +604,8 @@ public class CalciteReplaceDmlTest extends CalciteIngestionDmlTest
MatcherAssert.assertThat( MatcherAssert.assertThat(
e, e,
invalidSqlIs( invalidSqlIs(
"Invalid granularity ['invalid_granularity'] after PARTITIONED BY. " StringUtils.format(DruidSqlParserUtils.PARTITION_ERROR_MESSAGE, "'invalid_granularity'")
+ "Expected HOUR, DAY, MONTH, YEAR, ALL TIME, FLOOR() or TIME_FLOOR()" ));
)
);
} }
didTest = true; didTest = true;
} }

View File

@ -21,6 +21,7 @@ package org.apache.druid.sql.calcite.parser;
import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableList;
import org.apache.calcite.avatica.util.TimeUnit; import org.apache.calcite.avatica.util.TimeUnit;
import org.apache.calcite.avatica.util.TimeUnitRange;
import org.apache.calcite.sql.SqlAsOperator; import org.apache.calcite.sql.SqlAsOperator;
import org.apache.calcite.sql.SqlBasicCall; import org.apache.calcite.sql.SqlBasicCall;
import org.apache.calcite.sql.SqlIdentifier; import org.apache.calcite.sql.SqlIdentifier;
@ -39,12 +40,14 @@ import org.apache.druid.error.DruidExceptionMatcher;
import org.apache.druid.java.util.common.DateTimes; import org.apache.druid.java.util.common.DateTimes;
import org.apache.druid.java.util.common.granularity.Granularities; import org.apache.druid.java.util.common.granularity.Granularities;
import org.apache.druid.java.util.common.granularity.Granularity; import org.apache.druid.java.util.common.granularity.Granularity;
import org.apache.druid.sql.calcite.expression.TimeUnits;
import org.apache.druid.sql.calcite.expression.builtin.TimeFloorOperatorConversion; import org.apache.druid.sql.calcite.expression.builtin.TimeFloorOperatorConversion;
import org.apache.druid.sql.calcite.planner.Calcites; import org.apache.druid.sql.calcite.planner.Calcites;
import org.apache.druid.sql.calcite.planner.DruidTypeSystem; import org.apache.druid.sql.calcite.planner.DruidTypeSystem;
import org.hamcrest.MatcherAssert; import org.hamcrest.MatcherAssert;
import org.joda.time.DateTime; import org.joda.time.DateTime;
import org.joda.time.DateTimeZone; import org.joda.time.DateTimeZone;
import org.joda.time.Period;
import org.junit.Assert; import org.junit.Assert;
import org.junit.Test; import org.junit.Test;
import org.junit.experimental.runners.Enclosed; import org.junit.experimental.runners.Enclosed;
@ -102,23 +105,25 @@ public class DruidSqlParserUtilsTest
public static Iterable<Object[]> constructorFeeder() public static Iterable<Object[]> constructorFeeder()
{ {
return ImmutableList.of( return ImmutableList.of(
new Object[]{TimeUnit.SECOND, Granularities.SECOND}, new Object[]{TimeUnit.SECOND, TimeUnits.toPeriod(TimeUnitRange.SECOND), Granularities.SECOND},
new Object[]{TimeUnit.MINUTE, Granularities.MINUTE}, new Object[]{TimeUnit.MINUTE, TimeUnits.toPeriod(TimeUnitRange.MINUTE), Granularities.MINUTE},
new Object[]{TimeUnit.HOUR, Granularities.HOUR}, new Object[]{TimeUnit.HOUR, TimeUnits.toPeriod(TimeUnitRange.HOUR), Granularities.HOUR},
new Object[]{TimeUnit.DAY, Granularities.DAY}, new Object[]{TimeUnit.DAY, TimeUnits.toPeriod(TimeUnitRange.DAY), Granularities.DAY},
new Object[]{TimeUnit.WEEK, Granularities.WEEK}, new Object[]{TimeUnit.WEEK, TimeUnits.toPeriod(TimeUnitRange.WEEK), Granularities.WEEK},
new Object[]{TimeUnit.MONTH, Granularities.MONTH}, new Object[]{TimeUnit.MONTH, TimeUnits.toPeriod(TimeUnitRange.MONTH), Granularities.MONTH},
new Object[]{TimeUnit.QUARTER, Granularities.QUARTER}, new Object[]{TimeUnit.QUARTER, TimeUnits.toPeriod(TimeUnitRange.QUARTER), Granularities.QUARTER},
new Object[]{TimeUnit.YEAR, Granularities.YEAR} new Object[]{TimeUnit.YEAR, TimeUnits.toPeriod(TimeUnitRange.YEAR), Granularities.YEAR}
); );
} }
TimeUnit timeUnit; TimeUnit timeUnit;
Period period;
Granularity expectedGranularity; Granularity expectedGranularity;
public FloorToGranularityConversionTest(TimeUnit timeUnit, Granularity expectedGranularity) public FloorToGranularityConversionTest(TimeUnit timeUnit, Period period, Granularity expectedGranularity)
{ {
this.timeUnit = timeUnit; this.timeUnit = timeUnit;
this.period = period;
this.expectedGranularity = expectedGranularity; this.expectedGranularity = expectedGranularity;
} }
@ -133,6 +138,39 @@ public class DruidSqlParserUtilsTest
Granularity actualGranularity = DruidSqlParserUtils.convertSqlNodeToGranularityThrowingParseExceptions(floorCall); Granularity actualGranularity = DruidSqlParserUtils.convertSqlNodeToGranularityThrowingParseExceptions(floorCall);
Assert.assertEquals(expectedGranularity, actualGranularity); Assert.assertEquals(expectedGranularity, actualGranularity);
} }
/**
* Tests clause like "PARTITIONED BY 'day'"
*/
@Test
public void testConvertSqlNodeToGranularityAsLiteral() throws ParseException
{
SqlNode sqlNode = SqlLiteral.createCharString(timeUnit.name(), SqlParserPos.ZERO);
Granularity actualGranularity = DruidSqlParserUtils.convertSqlNodeToGranularityThrowingParseExceptions(sqlNode);
Assert.assertEquals(expectedGranularity, actualGranularity);
}
/**
* Tests clause like "PARTITIONED BY PT1D"
*/
@Test
public void testConvertSqlNodeToPeriodFormGranularityAsIdentifier() throws ParseException
{
SqlNode sqlNode = new SqlIdentifier(period.toString(), SqlParserPos.ZERO);
Granularity actualGranularity = DruidSqlParserUtils.convertSqlNodeToGranularityThrowingParseExceptions(sqlNode);
Assert.assertEquals(expectedGranularity, actualGranularity);
}
/**
* Tests clause like "PARTITIONED BY 'PT1D'"
*/
@Test
public void testConvertSqlNodeToPeriodFormGranularityAsLiteral() throws ParseException
{
SqlNode sqlNode = SqlLiteral.createCharString(period.toString(), SqlParserPos.ZERO);
Granularity actualGranularity = DruidSqlParserUtils.convertSqlNodeToGranularityThrowingParseExceptions(sqlNode);
Assert.assertEquals(expectedGranularity, actualGranularity);
}
} }
/** /**
@ -305,28 +343,6 @@ public class DruidSqlParserUtilsTest
public static class FloorToGranularityConversionErrorsTest public static class FloorToGranularityConversionErrorsTest
{ {
/**
* Tests clause like "PARTITIONED BY 'day'"
*/
@Test
public void testConvertSqlNodeToGranularityWithIncorrectNode()
{
SqlNode sqlNode = SqlLiteral.createCharString("day", SqlParserPos.ZERO);
DruidException e = Assert.assertThrows(
DruidException.class,
() -> DruidSqlParserUtils.convertSqlNodeToGranularityThrowingParseExceptions(sqlNode)
);
MatcherAssert.assertThat(
e,
DruidExceptionMatcher
.invalidSqlInput()
.expectMessageIs(
"Invalid granularity ['day'] after PARTITIONED BY. "
+ "Expected HOUR, DAY, MONTH, YEAR, ALL TIME, FLOOR() or TIME_FLOOR()"
)
);
}
/** /**
* Tests clause like "PARTITIONED BY CEIL(__time TO DAY)" * Tests clause like "PARTITIONED BY CEIL(__time TO DAY)"
*/ */
@ -424,11 +440,11 @@ public class DruidSqlParserUtilsTest
args.add(new SqlIdentifier("__time", SqlParserPos.ZERO)); args.add(new SqlIdentifier("__time", SqlParserPos.ZERO));
args.add(SqlLiteral.createCharString("abc", SqlParserPos.ZERO)); args.add(SqlLiteral.createCharString("abc", SqlParserPos.ZERO));
final SqlNode sqlNode = TimeFloorOperatorConversion.SQL_FUNCTION.createCall(args); final SqlNode sqlNode = TimeFloorOperatorConversion.SQL_FUNCTION.createCall(args);
ParseException e = Assert.assertThrows( DruidException e = Assert.assertThrows(
ParseException.class, DruidException.class,
() -> DruidSqlParserUtils.convertSqlNodeToGranularityThrowingParseExceptions(sqlNode) () -> DruidSqlParserUtils.convertSqlNodeToGranularityThrowingParseExceptions(sqlNode)
); );
Assert.assertEquals("'abc' is an invalid period string", e.getMessage()); Assert.assertEquals("granularity['abc'] is an invalid period string", e.getMessage());
} }
} }

View File

@ -1,3 +1,3 @@
LogicalInsert(target=[dst], partitionedBy=[AllGranularity], clusteredBy=[<none>]) LogicalInsert(target=[dst], partitionedBy=[ALL TIME], clusteredBy=[<none>])
LogicalProject(inputs=[0..2]) LogicalProject(inputs=[0..2])
ExternalTableScan(dataSource=[{"type":"external","inputSource":{"type":"CalciteIngestionDmlTest$TestFileInputSource","files":["/tmp/foo.csv"]},"inputFormat":{"type":"csv","columns":["x","y","z"]},"signature":[{"name":"x","type":"STRING"},{"name":"y","type":"STRING"},{"name":"z","type":"LONG"}]}]) ExternalTableScan(dataSource=[{"type":"external","inputSource":{"type":"CalciteIngestionDmlTest$TestFileInputSource","files":["/tmp/foo.csv"]},"inputFormat":{"type":"csv","columns":["x","y","z"]},"signature":[{"name":"x","type":"STRING"},{"name":"y","type":"STRING"},{"name":"z","type":"LONG"}]}])

View File

@ -1,3 +1,3 @@
LogicalInsert(target=[dst], partitionedBy=[AllGranularity], clusteredBy=[<none>]) LogicalInsert(target=[dst], partitionedBy=[ALL TIME], clusteredBy=[<none>])
LogicalProject(inputs=[0..2]) LogicalProject(inputs=[0..2])
ExternalTableScan(dataSource=[{"type":"external","inputSource":{"type":"http","uris":["http://foo.com/bar.csv"],"httpAuthenticationUsername":"bob","httpAuthenticationPassword":{"type":"default","password":"secret"}},"inputFormat":{"type":"csv","columns":["x","y","z"]},"signature":[{"name":"x","type":"STRING"},{"name":"y","type":"STRING"},{"name":"z","type":"LONG"}]}]) ExternalTableScan(dataSource=[{"type":"external","inputSource":{"type":"http","uris":["http://foo.com/bar.csv"],"httpAuthenticationUsername":"bob","httpAuthenticationPassword":{"type":"default","password":"secret"}},"inputFormat":{"type":"csv","columns":["x","y","z"]},"signature":[{"name":"x","type":"STRING"},{"name":"y","type":"STRING"},{"name":"z","type":"LONG"}]}])

View File

@ -1,3 +1,3 @@
LogicalInsert(target=[dst], partitionedBy=[AllGranularity], clusteredBy=[<none>]) LogicalInsert(target=[dst], partitionedBy=[ALL TIME], clusteredBy=[<none>])
LogicalProject(inputs=[0..2]) LogicalProject(inputs=[0..2])
ExternalTableScan(dataSource=[{"type":"external","inputSource":{"type":"inline","data":"a,b,1\nc,d,2\n"},"inputFormat":{"type":"csv","columns":["x","y","z"]},"signature":[{"name":"x","type":"STRING"},{"name":"y","type":"STRING"},{"name":"z","type":"LONG"}]}]) ExternalTableScan(dataSource=[{"type":"external","inputSource":{"type":"inline","data":"a,b,1\nc,d,2\n"},"inputFormat":{"type":"csv","columns":["x","y","z"]},"signature":[{"name":"x","type":"STRING"},{"name":"y","type":"STRING"},{"name":"z","type":"LONG"}]}])

View File

@ -0,0 +1,4 @@
LogicalInsert(target=[druid.dst], partitionedBy=[P1D], clusteredBy=[2, `dim1`, CEIL(`m2`)])
LogicalSort(sort0=[$1], sort1=[$2], sort2=[$3], dir0=[ASC], dir1=[ASC], dir2=[ASC])
LogicalProject(inputs=[0], exprs=[[FLOOR($5), $1, CEIL($6)]])
LogicalTableScan(table=[[druid, foo]])

View File

@ -1,4 +1,4 @@
LogicalInsert(target=[druid.dst], partitionedBy=[{type=period, period=P1D, timeZone=UTC, origin=null}], clusteredBy=[2, `dim1`, CEIL(`m2`)]) LogicalInsert(target=[druid.dst], partitionedBy=[FLOOR(`__TIME` TO DAY)], clusteredBy=[2, `dim1`, CEIL(`m2`)])
LogicalSort(sort0=[$1], sort1=[$2], sort2=[$3], dir0=[ASC], dir1=[ASC], dir2=[ASC]) LogicalSort(sort0=[$1], sort1=[$2], sort2=[$3], dir0=[ASC], dir1=[ASC], dir2=[ASC])
LogicalProject(inputs=[0], exprs=[[FLOOR($5), $1, CEIL($6)]]) LogicalProject(inputs=[0], exprs=[[FLOOR($5), $1, CEIL($6)]])
LogicalTableScan(table=[[druid, foo]]) LogicalTableScan(table=[[druid, foo]])

View File

@ -1,3 +1,3 @@
LogicalInsert(target=[druid.dst], partitionedBy=[{type=period, period=PT1H, timeZone=UTC, origin=null}], clusteredBy=[<none>]) LogicalInsert(target=[druid.dst], partitionedBy=[`TIME_FLOOR`(`__TIME`, 'PT1H')], clusteredBy=[<none>])
LogicalProject(inputs=[0], exprs=[[FLOOR($5), $1]]) LogicalProject(inputs=[0], exprs=[[FLOOR($5), $1]])
LogicalTableScan(table=[[druid, foo]]) LogicalTableScan(table=[[druid, foo]])

View File

@ -1,3 +1,3 @@
LogicalInsert(target=[dst], partitionedBy=[AllGranularity], clusteredBy=[<none>]) LogicalInsert(target=[dst], partitionedBy=[ALL TIME], clusteredBy=[<none>])
LogicalProject(inputs=[0..2]) LogicalProject(inputs=[0..2])
ExternalTableScan(dataSource=[{"type":"external","inputSource":{"type":"local","files":["/tmp/foo.csv","/tmp/bar.csv"]},"inputFormat":{"type":"csv","columns":["x","y","z"]},"signature":[{"name":"x","type":"STRING"},{"name":"y","type":"STRING"},{"name":"z","type":"LONG"}]}]) ExternalTableScan(dataSource=[{"type":"external","inputSource":{"type":"local","files":["/tmp/foo.csv","/tmp/bar.csv"]},"inputFormat":{"type":"csv","columns":["x","y","z"]},"signature":[{"name":"x","type":"STRING"},{"name":"y","type":"STRING"},{"name":"z","type":"LONG"}]}])