-
Notifications
You must be signed in to change notification settings - Fork 7.8k
Expand file tree
/
Copy pathlab1_2_my_notes.sql
More file actions
129 lines (129 loc) · 4.79 KB
/
lab1_2_my_notes.sql
File metadata and controls
129 lines (129 loc) · 4.79 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
/* STEP 0: The skeleton is this outer join query that joins NULL player data from 1995
with today's season data. Both tables don't have duplicate players */
-- WITH yesterday AS (
-- SELECT *
-- FROM players
-- WHERE current_season = 1995
-- ),
-- today AS (
-- SELECT *
-- FROM player_seasons
-- WHERE season = 1996
-- )
-- SELECT *
-- FROM today t
-- FULL OUTER JOIN yesterday y ON t.player_name = y.player_name;
/*------------------------------------------------------------------------------*/
/* STEP 1: populate the players table with data of 1996 */
INSERT into players WITH yesterday AS (
SELECT *
FROM players
WHERE current_season = 1995
),
today AS (
SELECT *
FROM player_seasons
WHERE season = 1996
)
SELECT COALESCE (t.player_name, y.player_name) AS player_name,
COALESCE (t.height, y.height) AS height,
COALESCE (t.college, y.college) AS college,
COALESCE (t.country, y.country) AS country,
COALESCE(t.draft_year, y.draft_year) AS draft_year,
COALESCE(t.draft_round, y.draft_round) AS draft_round,
COALESCE(t.draft_number, y.draft_number) AS draft_number,
CASE
/* create a season_stats column in the final query result using CASE
and y.seasons and t.season values.*/
WHEN y.seasons IS NULL THEN ARRAY [ROW(
t.season,
t.gp,
t.pts,
t.reb,
t.ast,
t.weight
)::season_stats]
WHEN t.season IS NOT NULL THEN y.seasons || ARRAY [ROW(
t.season,
t.gp,
t.pts,
t.reb,
t.ast,
t.weight
)::season_stats]
ELSE y.seasons
END AS season_stats,
COALESCE(t.season, y.current_season + 1) as current_season
FROM today t
FULL OUTER JOIN yesterday y ON t.player_name = y.player_name;
/* ---------------------------------------------------------------------------------*/
/* STEP 2: we repeat the previous code with today data of 1997 */
INSERT into players WITH yesterday AS (
SELECT *
FROM players
WHERE current_season = 2000
),
today AS (
SELECT *
FROM player_seasons
WHERE season = 2001
)
SELECT -- we don't need to repeat the constant info in two columns in case the player played
-- in the previous season and today's season
COALESCE (t.player_name, y.player_name) AS player_name,
COALESCE (t.height, y.height) AS height,
COALESCE (t.college, y.college) AS college,
COALESCE (t.country, y.country) AS country,
COALESCE(t.draft_year, y.draft_year) AS draft_year,
COALESCE(t.draft_round, y.draft_round) AS draft_round,
COALESCE(t.draft_number, y.draft_number) AS draft_number,
CASE
/* create a season_stats column in the final query result using CASE
and y.seasons and t.season values.*/
WHEN y.seasons IS NULL THEN ARRAY [ROW(
t.season,
t.gp,
t.pts,
t.reb,
t.ast,
t.weight
)::season_stats]
WHEN t.season IS NOT NULL THEN y.seasons || ARRAY [ROW(
t.season,
t.gp,
t.pts,
t.reb,
t.ast,
t.weight
)::season_stats]
ELSE y.seasons
END AS season_stats,
COALESCE(t.season, y.current_season + 1) as current_season
FROM today t
FULL OUTER JOIN yesterday y ON t.player_name = y.player_name;
/* Very important note: The previous query ADDS to the players table new rows
generated by the outer join. It doesn't assign to the players table those new
rows. This is why after executing the query, we will have duplicated rows for
players who played in both 1996 and 1997. Example: The player Andrew Lang. This
player has two rows now:
one with seasons value: {"(1996,52,5.3,5.3,0.5,275)"} and current_season 1996
another with seasons value: {"(1996,52,5.3,5.3,0.5,275)", "(1997,57,2.7,2.7,0.3,270)"}
and current_season 1997.
This is why you might get an error complaining about duplicated keys when executing
the previous query. Because there will be repeated (player_name,current_season)
values.
This is also why in the yesterday table you specify the current_season value.
Now it makes sense!
I saved the results of the "inside" query into a table called players_till_1997
This table has 527 rows. Whereas, when we insert those new rows to the players
table, the players table now has 968 rows which makes sense because the players
table which had only data from the season 1996 had 441 rows (441+527=968).
*/
/* STEP3: Execute the previous query with:
yesterday 1997 and today 1998
yesterday 1998 and today 1999
yesterday 1999 and today 2000
yesterday 2000 and today 2001
Now we have the players table with highest current_season value
of 2001*/
/*-------------------------------------------------------------*/