Extend the PARTITION BY clause to accept string literals for the time partitioning (#15836)

This PR contains a portion of the changes from the inactive draft PR for integrating the catalog with the Calcite planner https://github.com/apache/druid/pull/13686 from @paul-rogers, extending the PARTITION BY clause to accept string literals for the time partitioning
This commit is contained in:
zachjsh 2024-02-09 11:45:38 -05:00 committed by GitHub
parent 6e9eee4c5f
commit f9ee2c353b
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
21 changed files with 426 additions and 118 deletions

View File

@ -287,6 +287,33 @@ The following ISO 8601 periods are supported for `TIME_FLOOR` and the string con
- P3M
- P1Y
The string constant can also include any of the keywords mentioned above:
- `HOUR` - Same as `'PT1H'`
- `DAY` - Same as `'P1D'`
- `MONTH` - Same as `'P1M'`
- `YEAR` - Same as `'P1Y'`
- `ALL TIME`
- `ALL` - Alias for `ALL TIME`
The `WEEK` granularity is deprecated and not supported in MSQ.
Examples:
```SQL
-- Keyword
PARTITIONED BY HOUR
-- String literal
PARTITIONED BY 'HOUR'
-- ISO 8601 period
PARTITIONED BY 'PT1H'
-- TIME_FLOOR function
PARTITIONED BY TIME_FLOOR(__time, 'PT1H')
```
For more information about partitioning, see [Partitioning](concepts.md#partitioning-by-time). <br /><br />
*Avoid partitioning by week, `P1W`, because weeks don't align neatly with months and years, making it difficult to partition by coarser granularities later.

View File

@ -54,6 +54,7 @@ data: {
"org.apache.calcite.sql.SqlNodeList"
"org.apache.calcite.sql.SqlBasicCall"
"org.apache.druid.java.util.common.granularity.Granularity"
"org.apache.druid.java.util.common.granularity.GranularityType"
"org.apache.druid.java.util.common.granularity.Granularities"
"org.apache.druid.sql.calcite.parser.DruidSqlInsert"
"org.apache.druid.sql.calcite.parser.DruidSqlParserUtils"

View File

@ -17,8 +17,7 @@
* under the License.
*/
// Using fully qualified name for Pair class, since Calcite also has a same class name being used in the Parser.jj
org.apache.druid.java.util.common.Pair<Granularity, String> PartitionGranularity() :
SqlGranularityLiteral PartitionGranularity() :
{
SqlNode e;
Granularity granularity;
@ -52,14 +51,14 @@ org.apache.druid.java.util.common.Pair<Granularity, String> PartitionGranularity
|
<ALL>
{
granularity = Granularities.ALL;
unparseString = "ALL";
granularity = Granularities.ALL;
unparseString = "ALL";
}
[
<TIME>
{
unparseString += " TIME";
}
<TIME>
{
unparseString += " TIME";
}
]
|
e = Expression(ExprContext.ACCEPT_SUB_QUERY)
@ -69,7 +68,7 @@ org.apache.druid.java.util.common.Pair<Granularity, String> PartitionGranularity
}
)
{
return new org.apache.druid.java.util.common.Pair(granularity, unparseString);
return new SqlGranularityLiteral(granularity, unparseString, getPos());
}
}

View File

@ -33,7 +33,7 @@ SqlNode DruidSqlInsertEof() :
final SqlNodeList columnList;
final Span s;
final Pair<SqlNodeList, SqlNodeList> p;
org.apache.druid.java.util.common.Pair<Granularity, String> partitionedBy = new org.apache.druid.java.util.common.Pair(null, null);
SqlGranularityLiteral partitionedBy = null;
SqlNodeList clusteredBy = null;
String exportFileFormat = null;
}
@ -93,7 +93,7 @@ SqlNode DruidSqlInsertEof() :
clusteredBy = ClusteredBy()
]
{
if (clusteredBy != null && partitionedBy.lhs == null) {
if (clusteredBy != null && partitionedBy == null) {
throw org.apache.druid.sql.calcite.parser.DruidSqlParserUtils.problemParsing(
"CLUSTERED BY found before PARTITIONED BY, CLUSTERED BY must come after the PARTITIONED BY clause"
);
@ -112,6 +112,6 @@ SqlNode DruidSqlInsertEof() :
return insertNode;
}
SqlInsert sqlInsert = (SqlInsert) insertNode;
return new DruidSqlInsert(sqlInsert, partitionedBy.lhs, partitionedBy.rhs, clusteredBy, exportFileFormat);
return DruidSqlInsert.create(sqlInsert, partitionedBy, clusteredBy, exportFileFormat);
}
}

View File

@ -26,8 +26,7 @@ SqlNode DruidSqlReplaceEof() :
final Span s;
SqlNode tableRef = null;
SqlInsert sqlInsert;
// Using fully qualified name for Pair class, since Calcite also has a same class name being used in the Parser.jj
org.apache.druid.java.util.common.Pair<Granularity, String> partitionedBy = new org.apache.druid.java.util.common.Pair(null, null);
SqlGranularityLiteral partitionedBy = null;
SqlNodeList clusteredBy = null;
final Pair<SqlNodeList, SqlNodeList> p;
SqlNode replaceTimeQuery = null;
@ -78,7 +77,7 @@ SqlNode DruidSqlReplaceEof() :
clusteredBy = ClusteredBy()
]
{
if (clusteredBy != null && partitionedBy.lhs == null) {
if (clusteredBy != null && partitionedBy == null) {
throw org.apache.druid.sql.calcite.parser.DruidSqlParserUtils.problemParsing(
"CLUSTERED BY found before PARTITIONED BY, CLUSTERED BY must come after the PARTITIONED BY clause"
);
@ -91,7 +90,7 @@ SqlNode DruidSqlReplaceEof() :
<EOF>
{
sqlInsert = new SqlInsert(s.end(source), SqlNodeList.EMPTY, destination, source, columnList);
return new DruidSqlReplace(sqlInsert, partitionedBy.lhs, partitionedBy.rhs, clusteredBy, replaceTimeQuery, exportFileFormat);
return DruidSqlReplace.create(sqlInsert, partitionedBy, clusteredBy, replaceTimeQuery, exportFileFormat);
}
}

View File

@ -23,9 +23,10 @@ import org.apache.calcite.sql.SqlInsert;
import org.apache.calcite.sql.SqlNode;
import org.apache.calcite.sql.SqlNodeList;
import org.apache.calcite.sql.parser.SqlParserPos;
import org.apache.druid.java.util.common.granularity.Granularity;
import org.apache.calcite.util.ImmutableNullableList;
import javax.annotation.Nullable;
import java.util.List;
/**
* Common base class to the two Druid "ingest" statements: INSERT and REPLACE.
@ -37,10 +38,7 @@ public abstract class DruidSqlIngest extends SqlInsert
public static final String SQL_EXPORT_FILE_FORMAT = "__exportFileFormat";
@Nullable
protected final Granularity partitionedBy;
// Used in the unparse function to generate the original query since we convert the string to an enum
protected final String partitionedByStringForUnparse;
protected final SqlGranularityLiteral partitionedBy;
@Nullable
protected final SqlNodeList clusteredBy;
@ -53,22 +51,20 @@ public abstract class DruidSqlIngest extends SqlInsert
SqlNode targetTable,
SqlNode source,
SqlNodeList columnList,
@Nullable Granularity partitionedBy,
@Nullable String partitionedByStringForUnparse,
@Nullable SqlGranularityLiteral partitionedBy,
@Nullable SqlNodeList clusteredBy,
@Nullable String exportFileFormat
)
{
super(pos, keywords, targetTable, source, columnList);
this.partitionedByStringForUnparse = partitionedByStringForUnparse;
this.partitionedBy = partitionedBy;
this.clusteredBy = clusteredBy;
this.exportFileFormat = exportFileFormat;
}
@Nullable
public Granularity getPartitionedBy()
public SqlGranularityLiteral getPartitionedBy()
{
return partitionedBy;
}
@ -84,4 +80,14 @@ public abstract class DruidSqlIngest extends SqlInsert
{
return exportFileFormat;
}
@Override
public List<SqlNode> getOperandList()
{
return ImmutableNullableList.<SqlNode>builder()
.addAll(super.getOperandList())
.add(partitionedBy)
.add(clusteredBy)
.build();
}
}

View File

@ -24,7 +24,7 @@ import org.apache.calcite.sql.SqlNode;
import org.apache.calcite.sql.SqlNodeList;
import org.apache.calcite.sql.SqlOperator;
import org.apache.calcite.sql.SqlWriter;
import org.apache.druid.java.util.common.granularity.Granularity;
import org.apache.calcite.sql.parser.SqlParserPos;
import javax.annotation.Nonnull;
import javax.annotation.Nullable;
@ -41,28 +41,49 @@ public class DruidSqlInsert extends DruidSqlIngest
// This allows reusing super.unparse
public static final SqlOperator OPERATOR = SqlInsert.OPERATOR;
/**
* While partitionedBy and partitionedByStringForUnparse can be null as arguments to the constructor, this is
* disallowed (semantically) and the constructor performs checks to ensure that. This helps in producing friendly
* errors when the PARTITIONED BY custom clause is not present, and keeps its error separate from JavaCC/Calcite's
* custom errors which can be cryptic when someone accidentally forgets to explicitly specify the PARTITIONED BY clause
*/
public DruidSqlInsert(
public static DruidSqlInsert create(
@Nonnull SqlInsert insertNode,
@Nullable Granularity partitionedBy,
@Nullable String partitionedByStringForUnparse,
@Nullable SqlGranularityLiteral partitionedBy,
@Nullable SqlNodeList clusteredBy,
@Nullable String exportFileFormat
)
{
super(
return new DruidSqlInsert(
insertNode.getParserPosition(),
(SqlNodeList) insertNode.getOperandList().get(0), // No better getter to extract this
insertNode.getTargetTable(),
insertNode.getSource(),
insertNode.getTargetColumnList(),
partitionedBy,
partitionedByStringForUnparse,
clusteredBy,
exportFileFormat
);
}
/**
* While partitionedBy can be null as arguments to the constructor, this is
* disallowed (semantically) and the constructor performs checks to ensure that. This helps in producing friendly
* errors when the PARTITIONED BY custom clause is not present, and keeps its error separate from JavaCC/Calcite's
* custom errors which can be cryptic when someone accidentally forgets to explicitly specify the PARTITIONED BY clause
*/
public DruidSqlInsert(
SqlParserPos pos,
SqlNodeList keywords,
SqlNode targetTable,
SqlNode source,
SqlNodeList columnList,
@Nullable SqlGranularityLiteral partitionedBy,
@Nullable SqlNodeList clusteredBy,
@Nullable String exportFileFormat
)
{
super(
pos,
keywords,
targetTable,
source,
columnList,
partitionedBy,
clusteredBy,
exportFileFormat
);
@ -95,9 +116,9 @@ public class DruidSqlInsert extends DruidSqlIngest
getSource().unparse(writer, 0, 0);
writer.newlineAndIndent();
if (partitionedByStringForUnparse != null) {
if (getPartitionedBy() != null) {
writer.keyword("PARTITIONED BY");
writer.keyword(partitionedByStringForUnparse);
getPartitionedBy().unparse(writer, leftPrec, rightPrec);
}
if (getClusteredBy() != null) {

View File

@ -19,6 +19,7 @@
package org.apache.druid.sql.calcite.parser;
import com.google.common.annotations.VisibleForTesting;
import com.google.common.base.Preconditions;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableSet;
@ -77,6 +78,16 @@ public class DruidSqlParserUtils
private static final Logger log = new Logger(DruidSqlParserUtils.class);
public static final String ALL = "all";
private static final List<GranularityType> DOCUMENTED_GRANULARTIES = Arrays.stream(GranularityType.values())
.filter(g -> g != GranularityType.WEEK)
.collect(Collectors.toList());
@VisibleForTesting
public static final String PARTITION_ERROR_MESSAGE =
"Invalid granularity[%s] specified after PARTITIONED BY clause. "
+ "Expected "
+ StringUtils.replace(StringUtils.replace(DOCUMENTED_GRANULARTIES.toString(), "[", ""), "]", ",").trim()
+ " ALL TIME, FLOOR() or TIME_FLOOR()";
/**
* Delegates to {@code convertSqlNodeToGranularity} and converts the exceptions to {@link ParseException}
* with the underlying message
@ -96,28 +107,63 @@ public class DruidSqlParserUtils
}
/**
* This method is used to extract the granularity from a SqlNode representing following function calls:
* 1. FLOOR(__time TO TimeUnit)
* 2. TIME_FLOOR(__time, 'PT1H')
* This method is used to extract the granularity from a SqlNode which represents
* the argument to the {@code PARTITIONED BY} clause. The node can be any of the following:
* <ul>
* <li>A literal with a string that matches the SQL keywords
* {@code HOUR, DAY, MONTH, YEAR, ALL [TIME]}</li>
* <li>A literal string with a period in ISO 8601 format.</li>
* <li>Function call: {@code FLOOR(__time TO TimeUnit)}</li>
* <li>Function call: TIME_FLOOR(__time, 'PT1H')}</li>
* </ul>
* <p>
* Validation on the sqlNode is contingent to following conditions:
* 1. sqlNode is an instance of SqlCall
* 2. Operator is either one of TIME_FLOOR or FLOOR
* 3. Number of operands in the call are 2
* 4. First operand is a SimpleIdentifier representing __time
* 5. If operator is TIME_FLOOR, the second argument is a literal, and can be converted to the Granularity class
* 6. If operator is FLOOR, the second argument is a TimeUnit, and can be mapped using {@link TimeUnits}
* Validation of the function sqlNode is contingent to following conditions:
* <ol>
* <li>sqlNode is an instance of SqlCall</li>
* <li>Operator is either one of TIME_FLOOR or FLOOR</li>
* <li>Number of operands in the call are 2</li>
* <li>First operand is a SimpleIdentifier representing __time</li>
* <li>If operator is TIME_FLOOR, the second argument is a literal, and can be converted to the Granularity class</li>
* <li>If operator is FLOOR, the second argument is a TimeUnit, and can be mapped using {@link TimeUnits}</li>
* </ol>
* <p>
* Since it is to be used primarily while parsing the SqlNode, it is wrapped in {@code convertSqlNodeToGranularityThrowingParseExceptions}
* This method is called during validation, which will catch any errors. It is then called again
* during conversion, at which time we assume the node is valid.
*
* @param sqlNode SqlNode representing a call to a function
*
* @return Granularity as intended by the function call
*
* @throws ParseException SqlNode cannot be converted a granularity
* @throws DruidException if SqlNode cannot be converted to a granularity
*/
public static Granularity convertSqlNodeToGranularity(SqlNode sqlNode) throws ParseException
@Nullable
public static Granularity convertSqlNodeToGranularity(SqlNode sqlNode)
{
if (sqlNode == null) {
return null;
}
if (sqlNode instanceof SqlLiteral) {
final Granularity retVal;
SqlLiteral literal = (SqlLiteral) sqlNode;
if (SqlLiteral.valueMatchesType(literal.getValue(), SqlTypeName.CHAR)) {
retVal = convertSqlLiteralCharToGranularity(literal);
} else {
throw makeInvalidPartitionByException(literal);
}
validateSupportedGranularityForPartitionedBy(sqlNode, retVal);
return retVal;
}
if (sqlNode instanceof SqlIdentifier) {
SqlIdentifier identifier = (SqlIdentifier) sqlNode;
final Granularity retVal;
retVal = convertSqlIdentiferToGranularity(identifier);
validateSupportedGranularityForPartitionedBy(sqlNode, retVal);
return retVal;
}
if (!(sqlNode instanceof SqlCall)) {
throw makeInvalidPartitionByException(sqlNode);
}
@ -166,7 +212,9 @@ public class DruidSqlParserUtils
period = new Period(granularityString);
}
catch (IllegalArgumentException e) {
throw new ParseException(StringUtils.format("%s is an invalid period string", granularitySqlNode.toString()));
throw InvalidSqlInput.exception(
StringUtils.format("granularity[%s] is an invalid period string", granularitySqlNode.toString()),
sqlNode);
}
final PeriodGranularity retVal = new PeriodGranularity(period, null, null);
validateSupportedGranularityForPartitionedBy(sqlNode, retVal);
@ -200,17 +248,51 @@ public class DruidSqlParserUtils
throw makeInvalidPartitionByException(sqlNode);
}
private static Granularity convertSqlLiteralCharToGranularity(SqlLiteral literal)
{
String value = literal.getValueAs(String.class);
try {
return Granularity.fromString(value);
}
catch (IllegalArgumentException e) {
try {
return new PeriodGranularity(new Period(value), null, null);
}
catch (Exception e2) {
throw makeInvalidPartitionByException(literal);
}
}
}
private static Granularity convertSqlIdentiferToGranularity(SqlIdentifier identifier)
{
if (identifier.names.isEmpty()) {
throw makeInvalidPartitionByException(identifier);
}
String value = identifier.names.get(0);
try {
return Granularity.fromString(value);
}
catch (IllegalArgumentException e) {
try {
return new PeriodGranularity(new Period(value), null, null);
}
catch (Exception e2) {
throw makeInvalidPartitionByException(identifier);
}
}
}
private static DruidException makeInvalidPartitionByException(SqlNode sqlNode)
{
return InvalidSqlInput.exception(
"Invalid granularity [%s] after PARTITIONED BY. "
+ "Expected HOUR, DAY, MONTH, YEAR, ALL TIME, FLOOR() or TIME_FLOOR()",
PARTITION_ERROR_MESSAGE,
sqlNode
);
}
/**
* This method validates and converts a {@link SqlNode} representing a query into an optimized list of intervals to
* Validates and converts a {@link SqlNode} representing a query into an optimized list of intervals to
* be used in creating an ingestion spec. If the sqlNode is an SqlLiteral of {@link #ALL}, returns a singleton list of
* "ALL". Otherwise, it converts and optimizes the query using {@link MoveTimeFiltersToIntervals} into a list of
* intervals which contain all valid values of time as per the query.
@ -579,7 +661,10 @@ public class DruidSqlParserUtils
return String.valueOf(zonedTimestamp.toInstant().toEpochMilli());
}
public static void validateSupportedGranularityForPartitionedBy(SqlNode originalNode, Granularity granularity)
public static void validateSupportedGranularityForPartitionedBy(
@Nullable SqlNode originalNode,
Granularity granularity
)
{
if (!GranularityType.isStandard(granularity)) {
throw InvalidSqlInput.exception(

View File

@ -27,10 +27,12 @@ import org.apache.calcite.sql.SqlNodeList;
import org.apache.calcite.sql.SqlOperator;
import org.apache.calcite.sql.SqlSpecialOperator;
import org.apache.calcite.sql.SqlWriter;
import org.apache.druid.java.util.common.granularity.Granularity;
import org.apache.calcite.sql.parser.SqlParserPos;
import org.apache.calcite.util.ImmutableNullableList;
import javax.annotation.Nonnull;
import javax.annotation.Nullable;
import java.util.List;
/**
* Extends the 'replace' call to hold custom parameters specific to Druid i.e. PARTITIONED BY and the PARTITION SPECS
@ -45,29 +47,52 @@ public class DruidSqlReplace extends DruidSqlIngest
private final SqlNode replaceTimeQuery;
/**
* While partitionedBy and partitionedByStringForUnparse can be null as arguments to the constructor, this is
* disallowed (semantically) and the constructor performs checks to ensure that. This helps in producing friendly
* errors when the PARTITIONED BY custom clause is not present, and keeps its error separate from JavaCC/Calcite's
* custom errors which can be cryptic when someone accidentally forgets to explicitly specify the PARTITIONED BY clause
*/
public DruidSqlReplace(
public static DruidSqlReplace create(
@Nonnull SqlInsert insertNode,
@Nullable Granularity partitionedBy,
@Nullable String partitionedByStringForUnparse,
@Nullable SqlGranularityLiteral partitionedBy,
@Nullable SqlNodeList clusteredBy,
@Nullable SqlNode replaceTimeQuery,
@Nullable String exportFileFormat
)
{
super(
return new DruidSqlReplace(
insertNode.getParserPosition(),
(SqlNodeList) insertNode.getOperandList().get(0), // No better getter to extract this
insertNode.getTargetTable(),
insertNode.getSource(),
insertNode.getTargetColumnList(),
partitionedBy,
partitionedByStringForUnparse,
clusteredBy,
replaceTimeQuery,
exportFileFormat
);
}
/**
* While partitionedBy can be null as arguments to the constructor, this is
* disallowed (semantically) and the constructor performs checks to ensure that. This helps in producing friendly
* errors when the PARTITIONED BY custom clause is not present, and keeps its error separate from JavaCC/Calcite's
* custom errors which can be cryptic when someone accidentally forgets to explicitly specify the PARTITIONED BY clause
*/
public DruidSqlReplace(
SqlParserPos pos,
SqlNodeList keywords,
SqlNode targetTable,
SqlNode source,
SqlNodeList columnList,
@Nullable SqlGranularityLiteral partitionedBy,
@Nullable SqlNodeList clusteredBy,
@Nullable SqlNode replaceTimeQuery,
@Nullable String exportFileFormat
)
{
super(
pos,
keywords,
targetTable,
source,
columnList,
partitionedBy,
clusteredBy,
exportFileFormat
);
@ -87,6 +112,15 @@ public class DruidSqlReplace extends DruidSqlIngest
return OPERATOR;
}
@Override
public List<SqlNode> getOperandList()
{
return ImmutableNullableList.<SqlNode>builder()
.addAll(super.getOperandList())
.add(replaceTimeQuery)
.build();
}
@Override
public void unparse(SqlWriter writer, int leftPrec, int rightPrec)
{
@ -118,9 +152,9 @@ public class DruidSqlReplace extends DruidSqlIngest
getSource().unparse(writer, 0, 0);
writer.newlineAndIndent();
if (partitionedByStringForUnparse != null) {
if (getPartitionedBy() != null) {
writer.keyword("PARTITIONED BY");
writer.keyword(partitionedByStringForUnparse);
getPartitionedBy().unparse(writer, 0, 0);
}
if (getClusteredBy() != null) {

View File

@ -0,0 +1,77 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.druid.sql.calcite.parser;
import org.apache.calcite.sql.SqlIdentifier;
import org.apache.calcite.sql.SqlLiteral;
import org.apache.calcite.sql.SqlWriter;
import org.apache.calcite.sql.parser.SqlParserPos;
import org.apache.calcite.sql.type.SqlTypeName;
import org.apache.calcite.util.NlsString;
import org.apache.druid.error.DruidException;
import org.apache.druid.java.util.common.granularity.Granularity;
import javax.annotation.Nonnull;
/**
* Extends the {@link SqlIdentifier} to hold parameters for the PARTITIONED BY clause.
*/
public class SqlGranularityLiteral extends SqlLiteral
{
private String unparseString;
private Granularity granularity;
public SqlGranularityLiteral(
@Nonnull Granularity granularity,
@Nonnull String unparseString,
SqlParserPos pos)
{
super(new NlsString(unparseString, null, null), SqlTypeName.CHAR, pos);
this.granularity = granularity;
this.unparseString = unparseString;
}
@Override
public SqlGranularityLiteral clone(SqlParserPos pos)
{
return new SqlGranularityLiteral(granularity, unparseString, pos);
}
@Override
@Deprecated
public Object clone()
{
throw DruidException.defensive("Function is deprecated, please use clone(SqlNode) instead.");
}
@Nonnull
public Granularity getGranularity()
{
return granularity;
}
@Override
public void unparse(SqlWriter writer, int leftPrec, int rightPrec)
{
if (unparseString != null) {
writer.keyword(unparseString);
}
}
}

View File

@ -72,7 +72,7 @@ public abstract class IngestHandler extends QueryHandler
)
{
super(handlerContext, queryNode, explain);
this.ingestionGranularity = ingestNode.getPartitionedBy();
ingestionGranularity = ingestNode.getPartitionedBy() != null ? ingestNode.getPartitionedBy().getGranularity() : null;
handlerContext.hook().captureInsert(ingestNode);
}

View File

@ -47,6 +47,7 @@ import org.apache.druid.sql.calcite.external.ExternalDataSource;
import org.apache.druid.sql.calcite.external.Externals;
import org.apache.druid.sql.calcite.filtration.Filtration;
import org.apache.druid.sql.calcite.parser.DruidSqlInsert;
import org.apache.druid.sql.calcite.parser.DruidSqlParserUtils;
import org.apache.druid.sql.calcite.planner.Calcites;
import org.apache.druid.sql.calcite.planner.PlannerContext;
import org.apache.druid.sql.calcite.util.CalciteTests;
@ -1016,6 +1017,47 @@ public class CalciteInsertDmlTest extends CalciteIngestionDmlTest
.verify();
}
@Test
public void testInsertPeriodFormGranularityWithClusteredBy()
{
// Test correctness of the query when only the CLUSTERED BY clause is present
RowSignature targetRowSignature = RowSignature.builder()
.add("__time", ColumnType.LONG)
.add("floor_m1", ColumnType.FLOAT)
.add("dim1", ColumnType.STRING)
.add("ceil_m2", ColumnType.DOUBLE)
.build();
testIngestionQuery()
.sql(
"INSERT INTO druid.dst "
+ "SELECT __time, FLOOR(m1) as floor_m1, dim1, CEIL(m2) as ceil_m2 FROM foo "
+ "PARTITIONED BY P1D CLUSTERED BY 2, dim1, CEIL(m2)"
)
.expectTarget("dst", targetRowSignature)
.expectResources(dataSourceRead("foo"), dataSourceWrite("dst"))
.expectQuery(
newScanQueryBuilder()
.dataSource("foo")
.intervals(querySegmentSpec(Filtration.eternity()))
.columns("__time", "dim1", "v0", "v1")
.virtualColumns(
expressionVirtualColumn("v0", "floor(\"m1\")", ColumnType.FLOAT),
expressionVirtualColumn("v1", "ceil(\"m2\")", ColumnType.DOUBLE)
)
.orderBy(
ImmutableList.of(
new ScanQuery.OrderBy("v0", ScanQuery.Order.ASCENDING),
new ScanQuery.OrderBy("dim1", ScanQuery.Order.ASCENDING),
new ScanQuery.OrderBy("v1", ScanQuery.Order.ASCENDING)
)
)
.context(queryContextWithGranularity(Granularities.DAY))
.build()
)
.expectLogicalPlanFrom("insertPartitionedByP1DWithClusteredBy")
.verify();
}
@Test
public void testInsertWithoutPartitionedByWithClusteredBy()
{
@ -1130,8 +1172,7 @@ public class CalciteInsertDmlTest extends CalciteIngestionDmlTest
MatcherAssert.assertThat(
e,
invalidSqlIs(
"Invalid granularity ['invalid_granularity'] after PARTITIONED BY. "
+ "Expected HOUR, DAY, MONTH, YEAR, ALL TIME, FLOOR() or TIME_FLOOR()"
StringUtils.format(DruidSqlParserUtils.PARTITION_ERROR_MESSAGE, "'invalid_granularity'")
));
}
didTest = true;

View File

@ -604,10 +604,8 @@ public class CalciteReplaceDmlTest extends CalciteIngestionDmlTest
MatcherAssert.assertThat(
e,
invalidSqlIs(
"Invalid granularity ['invalid_granularity'] after PARTITIONED BY. "
+ "Expected HOUR, DAY, MONTH, YEAR, ALL TIME, FLOOR() or TIME_FLOOR()"
)
);
StringUtils.format(DruidSqlParserUtils.PARTITION_ERROR_MESSAGE, "'invalid_granularity'")
));
}
didTest = true;
}

View File

@ -21,6 +21,7 @@ package org.apache.druid.sql.calcite.parser;
import com.google.common.collect.ImmutableList;
import org.apache.calcite.avatica.util.TimeUnit;
import org.apache.calcite.avatica.util.TimeUnitRange;
import org.apache.calcite.sql.SqlAsOperator;
import org.apache.calcite.sql.SqlBasicCall;
import org.apache.calcite.sql.SqlIdentifier;
@ -39,12 +40,14 @@ import org.apache.druid.error.DruidExceptionMatcher;
import org.apache.druid.java.util.common.DateTimes;
import org.apache.druid.java.util.common.granularity.Granularities;
import org.apache.druid.java.util.common.granularity.Granularity;
import org.apache.druid.sql.calcite.expression.TimeUnits;
import org.apache.druid.sql.calcite.expression.builtin.TimeFloorOperatorConversion;
import org.apache.druid.sql.calcite.planner.Calcites;
import org.apache.druid.sql.calcite.planner.DruidTypeSystem;
import org.hamcrest.MatcherAssert;
import org.joda.time.DateTime;
import org.joda.time.DateTimeZone;
import org.joda.time.Period;
import org.junit.Assert;
import org.junit.Test;
import org.junit.experimental.runners.Enclosed;
@ -102,23 +105,25 @@ public class DruidSqlParserUtilsTest
public static Iterable<Object[]> constructorFeeder()
{
return ImmutableList.of(
new Object[]{TimeUnit.SECOND, Granularities.SECOND},
new Object[]{TimeUnit.MINUTE, Granularities.MINUTE},
new Object[]{TimeUnit.HOUR, Granularities.HOUR},
new Object[]{TimeUnit.DAY, Granularities.DAY},
new Object[]{TimeUnit.WEEK, Granularities.WEEK},
new Object[]{TimeUnit.MONTH, Granularities.MONTH},
new Object[]{TimeUnit.QUARTER, Granularities.QUARTER},
new Object[]{TimeUnit.YEAR, Granularities.YEAR}
new Object[]{TimeUnit.SECOND, TimeUnits.toPeriod(TimeUnitRange.SECOND), Granularities.SECOND},
new Object[]{TimeUnit.MINUTE, TimeUnits.toPeriod(TimeUnitRange.MINUTE), Granularities.MINUTE},
new Object[]{TimeUnit.HOUR, TimeUnits.toPeriod(TimeUnitRange.HOUR), Granularities.HOUR},
new Object[]{TimeUnit.DAY, TimeUnits.toPeriod(TimeUnitRange.DAY), Granularities.DAY},
new Object[]{TimeUnit.WEEK, TimeUnits.toPeriod(TimeUnitRange.WEEK), Granularities.WEEK},
new Object[]{TimeUnit.MONTH, TimeUnits.toPeriod(TimeUnitRange.MONTH), Granularities.MONTH},
new Object[]{TimeUnit.QUARTER, TimeUnits.toPeriod(TimeUnitRange.QUARTER), Granularities.QUARTER},
new Object[]{TimeUnit.YEAR, TimeUnits.toPeriod(TimeUnitRange.YEAR), Granularities.YEAR}
);
}
TimeUnit timeUnit;
Period period;
Granularity expectedGranularity;
public FloorToGranularityConversionTest(TimeUnit timeUnit, Granularity expectedGranularity)
public FloorToGranularityConversionTest(TimeUnit timeUnit, Period period, Granularity expectedGranularity)
{
this.timeUnit = timeUnit;
this.period = period;
this.expectedGranularity = expectedGranularity;
}
@ -133,6 +138,39 @@ public class DruidSqlParserUtilsTest
Granularity actualGranularity = DruidSqlParserUtils.convertSqlNodeToGranularityThrowingParseExceptions(floorCall);
Assert.assertEquals(expectedGranularity, actualGranularity);
}
/**
* Tests clause like "PARTITIONED BY 'day'"
*/
@Test
public void testConvertSqlNodeToGranularityAsLiteral() throws ParseException
{
SqlNode sqlNode = SqlLiteral.createCharString(timeUnit.name(), SqlParserPos.ZERO);
Granularity actualGranularity = DruidSqlParserUtils.convertSqlNodeToGranularityThrowingParseExceptions(sqlNode);
Assert.assertEquals(expectedGranularity, actualGranularity);
}
/**
* Tests clause like "PARTITIONED BY PT1D"
*/
@Test
public void testConvertSqlNodeToPeriodFormGranularityAsIdentifier() throws ParseException
{
SqlNode sqlNode = new SqlIdentifier(period.toString(), SqlParserPos.ZERO);
Granularity actualGranularity = DruidSqlParserUtils.convertSqlNodeToGranularityThrowingParseExceptions(sqlNode);
Assert.assertEquals(expectedGranularity, actualGranularity);
}
/**
* Tests clause like "PARTITIONED BY 'PT1D'"
*/
@Test
public void testConvertSqlNodeToPeriodFormGranularityAsLiteral() throws ParseException
{
SqlNode sqlNode = SqlLiteral.createCharString(period.toString(), SqlParserPos.ZERO);
Granularity actualGranularity = DruidSqlParserUtils.convertSqlNodeToGranularityThrowingParseExceptions(sqlNode);
Assert.assertEquals(expectedGranularity, actualGranularity);
}
}
/**
@ -305,28 +343,6 @@ public class DruidSqlParserUtilsTest
public static class FloorToGranularityConversionErrorsTest
{
/**
* Tests clause like "PARTITIONED BY 'day'"
*/
@Test
public void testConvertSqlNodeToGranularityWithIncorrectNode()
{
SqlNode sqlNode = SqlLiteral.createCharString("day", SqlParserPos.ZERO);
DruidException e = Assert.assertThrows(
DruidException.class,
() -> DruidSqlParserUtils.convertSqlNodeToGranularityThrowingParseExceptions(sqlNode)
);
MatcherAssert.assertThat(
e,
DruidExceptionMatcher
.invalidSqlInput()
.expectMessageIs(
"Invalid granularity ['day'] after PARTITIONED BY. "
+ "Expected HOUR, DAY, MONTH, YEAR, ALL TIME, FLOOR() or TIME_FLOOR()"
)
);
}
/**
* Tests clause like "PARTITIONED BY CEIL(__time TO DAY)"
*/
@ -424,11 +440,11 @@ public class DruidSqlParserUtilsTest
args.add(new SqlIdentifier("__time", SqlParserPos.ZERO));
args.add(SqlLiteral.createCharString("abc", SqlParserPos.ZERO));
final SqlNode sqlNode = TimeFloorOperatorConversion.SQL_FUNCTION.createCall(args);
ParseException e = Assert.assertThrows(
ParseException.class,
DruidException e = Assert.assertThrows(
DruidException.class,
() -> DruidSqlParserUtils.convertSqlNodeToGranularityThrowingParseExceptions(sqlNode)
);
Assert.assertEquals("'abc' is an invalid period string", e.getMessage());
Assert.assertEquals("granularity['abc'] is an invalid period string", e.getMessage());
}
}

View File

@ -1,3 +1,3 @@
LogicalInsert(target=[dst], partitionedBy=[AllGranularity], clusteredBy=[<none>])
LogicalInsert(target=[dst], partitionedBy=[ALL TIME], clusteredBy=[<none>])
LogicalProject(inputs=[0..2])
ExternalTableScan(dataSource=[{"type":"external","inputSource":{"type":"CalciteIngestionDmlTest$TestFileInputSource","files":["/tmp/foo.csv"]},"inputFormat":{"type":"csv","columns":["x","y","z"]},"signature":[{"name":"x","type":"STRING"},{"name":"y","type":"STRING"},{"name":"z","type":"LONG"}]}])

View File

@ -1,3 +1,3 @@
LogicalInsert(target=[dst], partitionedBy=[AllGranularity], clusteredBy=[<none>])
LogicalInsert(target=[dst], partitionedBy=[ALL TIME], clusteredBy=[<none>])
LogicalProject(inputs=[0..2])
ExternalTableScan(dataSource=[{"type":"external","inputSource":{"type":"http","uris":["http://foo.com/bar.csv"],"httpAuthenticationUsername":"bob","httpAuthenticationPassword":{"type":"default","password":"secret"}},"inputFormat":{"type":"csv","columns":["x","y","z"]},"signature":[{"name":"x","type":"STRING"},{"name":"y","type":"STRING"},{"name":"z","type":"LONG"}]}])

View File

@ -1,3 +1,3 @@
LogicalInsert(target=[dst], partitionedBy=[AllGranularity], clusteredBy=[<none>])
LogicalInsert(target=[dst], partitionedBy=[ALL TIME], clusteredBy=[<none>])
LogicalProject(inputs=[0..2])
ExternalTableScan(dataSource=[{"type":"external","inputSource":{"type":"inline","data":"a,b,1\nc,d,2\n"},"inputFormat":{"type":"csv","columns":["x","y","z"]},"signature":[{"name":"x","type":"STRING"},{"name":"y","type":"STRING"},{"name":"z","type":"LONG"}]}])

View File

@ -0,0 +1,4 @@
LogicalInsert(target=[druid.dst], partitionedBy=[P1D], clusteredBy=[2, `dim1`, CEIL(`m2`)])
LogicalSort(sort0=[$1], sort1=[$2], sort2=[$3], dir0=[ASC], dir1=[ASC], dir2=[ASC])
LogicalProject(inputs=[0], exprs=[[FLOOR($5), $1, CEIL($6)]])
LogicalTableScan(table=[[druid, foo]])

View File

@ -1,4 +1,4 @@
LogicalInsert(target=[druid.dst], partitionedBy=[{type=period, period=P1D, timeZone=UTC, origin=null}], clusteredBy=[2, `dim1`, CEIL(`m2`)])
LogicalInsert(target=[druid.dst], partitionedBy=[FLOOR(`__TIME` TO DAY)], clusteredBy=[2, `dim1`, CEIL(`m2`)])
LogicalSort(sort0=[$1], sort1=[$2], sort2=[$3], dir0=[ASC], dir1=[ASC], dir2=[ASC])
LogicalProject(inputs=[0], exprs=[[FLOOR($5), $1, CEIL($6)]])
LogicalTableScan(table=[[druid, foo]])

View File

@ -1,3 +1,3 @@
LogicalInsert(target=[druid.dst], partitionedBy=[{type=period, period=PT1H, timeZone=UTC, origin=null}], clusteredBy=[<none>])
LogicalInsert(target=[druid.dst], partitionedBy=[`TIME_FLOOR`(`__TIME`, 'PT1H')], clusteredBy=[<none>])
LogicalProject(inputs=[0], exprs=[[FLOOR($5), $1]])
LogicalTableScan(table=[[druid, foo]])

View File

@ -1,3 +1,3 @@
LogicalInsert(target=[dst], partitionedBy=[AllGranularity], clusteredBy=[<none>])
LogicalInsert(target=[dst], partitionedBy=[ALL TIME], clusteredBy=[<none>])
LogicalProject(inputs=[0..2])
ExternalTableScan(dataSource=[{"type":"external","inputSource":{"type":"local","files":["/tmp/foo.csv","/tmp/bar.csv"]},"inputFormat":{"type":"csv","columns":["x","y","z"]},"signature":[{"name":"x","type":"STRING"},{"name":"y","type":"STRING"},{"name":"z","type":"LONG"}]}])