From 867c636629ce3a758fa6fc8e6410c228f4db43b5 Mon Sep 17 00:00:00 2001 From: Katya Macedo <38017980+ektravel@users.noreply.github.com> Date: Thu, 25 Jan 2024 11:53:39 -0600 Subject: [PATCH] Document pivot and unpivot operators (#15669) --- docs/querying/sql-operators.md | 7 ++ docs/querying/sql.md | 125 +++++++++++++++++++++++++++++++-- website/.spelling | 4 ++ 3 files changed, 129 insertions(+), 7 deletions(-) diff --git a/docs/querying/sql-operators.md b/docs/querying/sql-operators.md index da295821ecf..ebf68099019 100644 --- a/docs/querying/sql-operators.md +++ b/docs/querying/sql-operators.md @@ -108,3 +108,10 @@ Also see the [CONCAT function](sql-scalar.md#string-functions). |`x NOT IN (values)`|True if _x_ is not one of the listed values| |`x IN (subquery)`|True if _x_ is returned by the subquery. This will be translated into a join; see [Query translation](sql-translation.md) for details.| |`x NOT IN (subquery)`|True if _x_ is not returned by the subquery. This will be translated into a join; see [Query translation](sql-translation.md) for details.| + +## Other operators + +|Operator|Description| +|--------|-----------| +|`PIVOT (aggregation_function(column_to_aggregate) FOR column_with_values_to_pivot IN (pivoted_column1 [, pivoted_column2 ...]))`|Carries out an aggregation and transforms rows into columns in the output.| +|`UNPIVOT (values_column FOR names_column IN (unpivoted_column1 [, unpivoted_column2 ... ]))`|Transforms existing column values into rows.| \ No newline at end of file diff --git a/docs/querying/sql.md b/docs/querying/sql.md index ce438cbddc9..6da894e4e67 100644 --- a/docs/querying/sql.md +++ b/docs/querying/sql.md @@ -57,6 +57,8 @@ Druid SQL supports SELECT queries with the following structure: [ WITH tableName [ ( column1, column2, ... ) ] AS ( query ) ] SELECT [ ALL | DISTINCT ] { * | exprs } FROM { | () | [ INNER | LEFT ] JOIN ON condition } +[PIVOT (aggregation_function(column_to_aggregate) FOR column_with_values_to_pivot IN (pivoted_column1 [, pivoted_column2 ...]))] +[UNPIVOT (values_column FOR names_column IN (unpivoted_column1 [, unpivoted_column2 ... ]))] [ CROSS JOIN UNNEST(source_expression) as table_alias_name(column_alias_name) ] [ WHERE expr ] [ GROUP BY [ exprs | GROUPING SETS ( (exprs), ... ) | ROLLUP (exprs) | CUBE (exprs) ] ] @@ -85,6 +87,121 @@ FROM clause, metadata tables are not considered datasources. They exist only in For more information about table, lookup, query, and join datasources, refer to the [Datasources](datasource.md) documentation. +## PIVOT + +:::info +The PIVOT operator is an [experimental feature](../development/experimental-features.md). +::: + +The PIVOT operator carries out an aggregation and transforms rows into columns in the output. + +The following is the general syntax for the PIVOT operator. Note that the PIVOT operator is enclosed in parentheses and forms part of the FROM clause of the query. + +```sql +PIVOT (aggregation_function(column_to_aggregate) + FOR column_with_values_to_pivot + IN (pivoted_column1 [, pivoted_column2 ...]) +) +``` + +PIVOT syntax parameters: + +* `aggregation_function`: An aggregation function, such as SUM, COUNT, MIN, MAX, or AVG. +* `column_to_aggregate`: The source column to be aggregated. +* `column_with_values_to_pivot`: The column that contains values for the pivoted column names. +* `pivoted_columnN`: The list of values to pivot into headers in the output. + +The following example demonstrates how to transform `cityName` values into column headers `ba_sum_deleted` and `ny_sum_deleted`: + +```sql +SELECT user, channel, ba_sum_deleted, ny_sum_deleted +FROM "wikipedia" +PIVOT (SUM(deleted) AS "sum_deleted" FOR "cityName" IN ( 'Buenos Aires' AS ba, 'New York' AS ny)) +WHERE ba_sum_deleted IS NOT NULL OR ny_sum_deleted IS NOT NULL +LIMIT 15 +``` + +
+ View results + +|`user`|`channel`|`ba_sum_deleted`|`ny_sum_deleted`| +|------|---------|----------------|----------------| +|181.230.118.178|`#en.wikipedia`|0|null| +|181.230.118.178|`#en.wikipedia`|0|null| +|69.86.6.150|`#en.wikipedia`|null|1| +|190.123.145.147|`#es.wikipedia`|0|null| +|181.230.118.178|`#en.wikipedia`|16|null| +|181.230.118.178|`#en.wikipedia`|0|null| +|181.230.118.178|`#en.wikipedia`|0|null| +|181.230.118.178|`#en.wikipedia`|0|null| +|181.230.118.178|`#en.wikipedia`|0|null| +|181.230.118.178|`#en.wikipedia`|0|null| +|181.230.118.178|`#en.wikipedia`|0|null| +|190.192.179.192|`#en.wikipedia`|0|null| +|181.230.118.178|`#en.wikipedia`|0|null| +|181.230.118.178|`#en.wikipedia`|0|null| +|181.230.118.178|`#en.wikipedia`|0|null| + +
+ +## UNPIVOT + +:::info +The UNPIVOT operator is an [experimental feature](../development/experimental-features.md). +::: + +The UNPIVOT operator transforms existing column values into rows. +Note that UNPIVOT isn't the exact reverse operation of PIVOT. The PIVOT operator carries out an aggregation and merges rows as needed. UNPIVOT doesn't reproduce the original rows that have been merged. + +The following is the general syntax for the UNPIVOT operator. Note that the UNPIVOT operator is enclosed in parentheses and forms part of the FROM clause of the query. + +```sql +UNPIVOT (values_column + FOR names_column + IN (unpivoted_column1 [, unpivoted_column2 ... ]) +) +``` + +UNPIVOT syntax parameters: + +* `values_column`: The column that contains the values of the unpivoted columns. +* `names_column`: The column that contains the names of the unpivoted columns. +* `unpivoted_columnN`: The list of columns to transform into rows in the output. + +The following example demonstrates how to transform the columns `added` and `deleted` into row values that correspond to a particular `channel`: + +```sql +SELECT channel, user, action, SUM(changes) AS total_changes +FROM "wikipedia" +UNPIVOT ( changes FOR action IN ("added", "deleted") ) +WHERE channel LIKE '#ar%' +GROUP BY channel, user, action +LIMIT 15 +``` + +
+ View results + +|`channel`|`user`|`action`|`total_changes`| +|---------|------|--------|---------------| +|`#ar.wikipedia`|156.202.189.223|added|0| +|`#ar.wikipedia`|156.202.189.223|deleted|30| +|`#ar.wikipedia`|156.202.76.160|added|0| +|`#ar.wikipedia`|156.202.76.160|deleted|0| +|`#ar.wikipedia`|156.212.124.165|added|451| +|`#ar.wikipedia`|156.212.124.165|deleted|0| +|`#ar.wikipedia`|160.166.147.167|added|1| +|`#ar.wikipedia`|160.166.147.167|deleted|0| +|`#ar.wikipedia`|185.99.32.50|added|1| +|`#ar.wikipedia`|185.99.32.50|deleted|0| +|`#ar.wikipedia`|197.18.109.148|added|0| +|`#ar.wikipedia`|197.18.109.148|deleted|24| +|`#ar.wikipedia`|`2001:16A2:3C7:6C00:917E:AD28:FAD3:FD5C`|added|1| +|`#ar.wikipedia`|`2001:16A2:3C7:6C00:917E:AD28:FAD3:FD5C`|deleted|0| +|`#ar.wikipedia`|41.108.33.83|added|0| + +
+ ## UNNEST The UNNEST clause unnests ARRAY typed values. The source for UNNEST can be an array type column, or an input that's been transformed into an array, such as with helper functions like [`MV_TO_ARRAY`](./sql-multivalue-string-functions.md) or [`ARRAY`](./sql-array-functions.md). @@ -139,7 +256,6 @@ WHERE stringDim IN ('1', '2', '3') Note that explicit type casting does not lead to significant performance improvement when comparing strings and numbers involving numeric dimensions since numeric dimensions are not indexed. - ## GROUP BY The GROUP BY clause refers to columns in the FROM table. Using GROUP BY, DISTINCT, or any aggregation functions will @@ -283,9 +399,4 @@ To solve this issue, explicitly provide the type of the dynamic parameter using ``` SELECT * FROM druid.foo WHERE dim1 like CONCAT('%', CAST (? AS VARCHAR), '%') -``` - - - - - +``` \ No newline at end of file diff --git a/website/.spelling b/website/.spelling index 67be33c2d99..56ad2a69217 100644 --- a/website/.spelling +++ b/website/.spelling @@ -778,6 +778,10 @@ APPROX_COUNT_DISTINCT_DS_THETA APPROX_QUANTILE_DS DS_QUANTILES_SKETCH APPROX_QUANTILE_FIXED_BUCKETS +# Operators +pivoted +UNPIVOT +unpivoted # File specific overrides 100x _common