007g_PivotInSQL(SQL)

Loading...

ScaDaMaLe Course site and book

SQL Pivoting since Spark 2.4

SQL Pivot: Converting Rows to Columns

This is from the following blogpost:

Load Data

Create tables and load temperature data

CREATE OR REPLACE TEMPORARY VIEW high_temps
  USING csv
  OPTIONS (path "/databricks-datasets/weather/high_temps", header "true", mode "FAILFAST")
OK
CREATE OR REPLACE TEMPORARY VIEW low_temps
  USING csv
  OPTIONS (path "/databricks-datasets/weather/low_temps", header "true", mode "FAILFAST")
OK

Pivoting in SQL

Getting the monthly average high temperatures with month as columns and year as rows.

SELECT * FROM (
  SELECT year(date) year, month(date) month, temp
  FROM high_temps
  WHERE date BETWEEN DATE '2015-01-01' AND DATE '2018-08-31'
)
PIVOT (
  CAST(avg(temp) AS DECIMAL(4, 1))
  FOR month in (
    1 JAN, 2 FEB, 3 MAR, 4 APR, 5 MAY, 6 JUN,
    7 JUL, 8 AUG, 9 SEP, 10 OCT, 11 NOV, 12 DEC
  )
)
ORDER BY year DESC
 
 
year
JAN
FEB
MAR
APR
MAY
JUN
JUL
AUG
SEP
OCT
NOV
DEC
1
2
3
4
2018
49.7
45.8
54
58.6
70.8
71.9
82.8
79.1
null
null
null
null
2017
43.7
46.6
51.5
57.3
67
72.1
78.3
81.5
73.8
61.1
51.3
45.5
2016
49.1
53.6
56.4
65.9
68.8
73.1
76
79.5
69.6
60.5
56
41.9
2015
50.3
54.5
57.9
59.9
68
78.9
82.6
79
68.5
63.6
49.4
47.1

Showing all 4 rows.

Pivoting with Multiple Aggregate Expressions

Getting monthly average and maximum high temperatures with month as columns and year as rows.

SELECT * FROM (
  SELECT year(date) year, month(date) month, temp
  FROM high_temps
  WHERE date BETWEEN DATE '2015-01-01' AND DATE '2018-08-31'
)
PIVOT (
  CAST(avg(temp) AS DECIMAL(4, 1)) avg, max(temp) max
  FOR month in (6 JUN, 7 JUL, 8 AUG, 9 SEP)
)
ORDER BY year DESC
 
 
year
JUN_avg
JUN_max
JUL_avg
JUL_max
AUG_avg
AUG_max
SEP_avg
SEP_max
1
2
3
4
2018
71.9
88
82.8
94
79.1
94
null
null
2017
72.1
96
78.3
87
81.5
94
73.8
90
2016
73.1
93
76
89
79.5
95
69.6
78
2015
78.9
92
82.6
95
79
92
68.5
81

Showing all 4 rows.

Pivoting with Multiple Grouping Columns

Getting monthly average high and average low temperatures with month as columns and (year, hi/lo) as rows.

SELECT * FROM (
  SELECT year(date) year, month(date) month, temp, flag `H/L`
  FROM (
    SELECT date, temp, 'H' as flag
    FROM high_temps
    UNION ALL
    SELECT date, temp, 'L' as flag
    FROM low_temps
  )
  WHERE date BETWEEN DATE '2015-01-01' AND DATE '2018-08-31'
)
PIVOT (
  CAST(avg(temp) AS DECIMAL(4, 1))
  FOR month in (6 JUN, 7 JUL, 8 AUG, 9 SEP)
)
ORDER BY year DESC, `H/L` ASC
 
 
year
H/L
JUN
JUL
AUG
SEP
1
2
3
4
5
6
7
8
2018
H
71.9
82.8
79.1
null
2018
L
53.4
58.5
58.5
null
2017
H
72.1
78.3
81.5
73.8
2017
L
53.7
56.3
59
55.6
2016
H
73.1
76
79.5
69.6
2016
L
53.9
57.6
57.9
52.9
2015
H
78.9
82.6
79
68.5
2015
L
56.4
59.9
58.5
52.5

Showing all 8 rows.

Pivoting with Multiple Pivot Columns

Getting monthly average high and average low temperatures with (month, hi/lo) as columns and year as rows.

SELECT * FROM (
  SELECT year(date) year, month(date) month, temp, flag
  FROM (
    SELECT date, temp, 'H' as flag
    FROM high_temps
    UNION ALL
    SELECT date, temp, 'L' as flag
    FROM low_temps
  )
  WHERE date BETWEEN DATE '2015-01-01' AND DATE '2018-08-31'
)
PIVOT (
  CAST(avg(temp) AS DECIMAL(4, 1))
  FOR (month, flag) in (
    (6, 'H') JUN_hi, (6, 'L') JUN_lo,
    (7, 'H') JUL_hi, (7, 'L') JUL_lo,
    (8, 'H') AUG_hi, (8, 'L') AUG_lo,
    (9, 'H') SEP_hi, (9, 'L') SEP_lo
  )
)
ORDER BY year DESC
 
 
year
JUN_hi
JUN_lo
JUL_hi
JUL_lo
AUG_hi
AUG_lo
SEP_hi
SEP_lo
1
2
3
4
2018
71.9
53.4
82.8
58.5
79.1
58.5
null
null
2017
72.1
53.7
78.3
56.3
81.5
59
73.8
55.6
2016
73.1
53.9
76
57.6
79.5
57.9
69.6
52.9
2015
78.9
56.4
82.6
59.9
79
58.5
68.5
52.5

Showing all 4 rows.