SQL Pivoting since Spark 2.4
SQL Pivot: Converting Rows to Columns
This is from the following blogpost: - https://databricks.com/blog/2018/11/01/sql-pivot-converting-rows-to-columns.html
This is a useful trick to know when having to do ETL before exploring datasets that need row to column conversions.
Load Data
Create tables and load temperature data
CREATE OR REPLACE TEMPORARY VIEW high_temps
USING csv
OPTIONS (path "/databricks-datasets/weather/high_temps", header "true", mode "FAILFAST")
CREATE OR REPLACE TEMPORARY VIEW low_temps
USING csv
OPTIONS (path "/databricks-datasets/weather/low_temps", header "true", mode "FAILFAST")
Pivoting in SQL
Getting the monthly average high temperatures with month as columns and year as rows.
SELECT * FROM (
SELECT year(date) year, month(date) month, temp
FROM high_temps
WHERE date BETWEEN DATE '2015-01-01' AND DATE '2018-08-31'
)
PIVOT (
CAST(avg(temp) AS DECIMAL(4, 1))
FOR month in (
1 JAN, 2 FEB, 3 MAR, 4 APR, 5 MAY, 6 JUN,
7 JUL, 8 AUG, 9 SEP, 10 OCT, 11 NOV, 12 DEC
)
)
ORDER BY year DESC
year | JAN | FEB | MAR | APR | MAY | JUN | JUL | AUG | SEP | OCT | NOV | DEC |
---|---|---|---|---|---|---|---|---|---|---|---|---|
2018.0 | 49.7 | 45.8 | 54.0 | 58.6 | 70.8 | 71.9 | 82.8 | 79.1 | null | null | null | null |
2017.0 | 43.7 | 46.6 | 51.5 | 57.3 | 67.0 | 72.1 | 78.3 | 81.5 | 73.8 | 61.1 | 51.3 | 45.5 |
2016.0 | 49.1 | 53.6 | 56.4 | 65.9 | 68.8 | 73.1 | 76.0 | 79.5 | 69.6 | 60.5 | 56.0 | 41.9 |
2015.0 | 50.3 | 54.5 | 57.9 | 59.9 | 68.0 | 78.9 | 82.6 | 79.0 | 68.5 | 63.6 | 49.4 | 47.1 |
Pivoting with Multiple Aggregate Expressions
Getting monthly average and maximum high temperatures with month as columns and year as rows.
SELECT * FROM (
SELECT year(date) year, month(date) month, temp
FROM high_temps
WHERE date BETWEEN DATE '2015-01-01' AND DATE '2018-08-31'
)
PIVOT (
CAST(avg(temp) AS DECIMAL(4, 1)) avg, max(temp) max
FOR month in (6 JUN, 7 JUL, 8 AUG, 9 SEP)
)
ORDER BY year DESC
year | JUN_avg | JUN_max | JUL_avg | JUL_max | AUG_avg | AUG_max | SEP_avg | SEP_max |
---|---|---|---|---|---|---|---|---|
2018.0 | 71.9 | 88 | 82.8 | 94 | 79.1 | 94 | null | null |
2017.0 | 72.1 | 96 | 78.3 | 87 | 81.5 | 94 | 73.8 | 90 |
2016.0 | 73.1 | 93 | 76.0 | 89 | 79.5 | 95 | 69.6 | 78 |
2015.0 | 78.9 | 92 | 82.6 | 95 | 79.0 | 92 | 68.5 | 81 |
Pivoting with Multiple Grouping Columns
Getting monthly average high and average low temperatures with month as columns and (year, hi/lo) as rows.
SELECT * FROM (
SELECT year(date) year, month(date) month, temp, flag `H/L`
FROM (
SELECT date, temp, 'H' as flag
FROM high_temps
UNION ALL
SELECT date, temp, 'L' as flag
FROM low_temps
)
WHERE date BETWEEN DATE '2015-01-01' AND DATE '2018-08-31'
)
PIVOT (
CAST(avg(temp) AS DECIMAL(4, 1))
FOR month in (6 JUN, 7 JUL, 8 AUG, 9 SEP)
)
ORDER BY year DESC, `H/L` ASC
year | H/L | JUN | JUL | AUG | SEP |
---|---|---|---|---|---|
2018.0 | H | 71.9 | 82.8 | 79.1 | null |
2018.0 | L | 53.4 | 58.5 | 58.5 | null |
2017.0 | H | 72.1 | 78.3 | 81.5 | 73.8 |
2017.0 | L | 53.7 | 56.3 | 59.0 | 55.6 |
2016.0 | H | 73.1 | 76.0 | 79.5 | 69.6 |
2016.0 | L | 53.9 | 57.6 | 57.9 | 52.9 |
2015.0 | H | 78.9 | 82.6 | 79.0 | 68.5 |
2015.0 | L | 56.4 | 59.9 | 58.5 | 52.5 |
Pivoting with Multiple Pivot Columns
Getting monthly average high and average low temperatures with (month, hi/lo) as columns and year as rows.
SELECT * FROM (
SELECT year(date) year, month(date) month, temp, flag
FROM (
SELECT date, temp, 'H' as flag
FROM high_temps
UNION ALL
SELECT date, temp, 'L' as flag
FROM low_temps
)
WHERE date BETWEEN DATE '2015-01-01' AND DATE '2018-08-31'
)
PIVOT (
CAST(avg(temp) AS DECIMAL(4, 1))
FOR (month, flag) in (
(6, 'H') JUN_hi, (6, 'L') JUN_lo,
(7, 'H') JUL_hi, (7, 'L') JUL_lo,
(8, 'H') AUG_hi, (8, 'L') AUG_lo,
(9, 'H') SEP_hi, (9, 'L') SEP_lo
)
)
ORDER BY year DESC
year | JUN_hi | JUN_lo | JUL_hi | JUL_lo | AUG_hi | AUG_lo | SEP_hi | SEP_lo |
---|---|---|---|---|---|---|---|---|
2018.0 | 71.9 | 53.4 | 82.8 | 58.5 | 79.1 | 58.5 | null | null |
2017.0 | 72.1 | 53.7 | 78.3 | 56.3 | 81.5 | 59.0 | 73.8 | 55.6 |
2016.0 | 73.1 | 53.9 | 76.0 | 57.6 | 79.5 | 57.9 | 69.6 | 52.9 |
2015.0 | 78.9 | 56.4 | 82.6 | 59.9 | 79.0 | 58.5 | 68.5 | 52.5 |