Skip to content

Commit a7175d8

Browse files
committed
lec 38
1 parent ef2e1ef commit a7175d8

3 files changed

Lines changed: 1190 additions & 73 deletions

File tree

lec/lec36/lec36.ipynb

Lines changed: 781 additions & 45 deletions
Large diffs are not rendered by default.

lec/lec37/lec37.ipynb

Lines changed: 229 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
"cells": [
33
{
44
"cell_type": "code",
5-
"execution_count": null,
5+
"execution_count": 2,
66
"metadata": {},
77
"outputs": [],
88
"source": [
@@ -21,7 +21,7 @@
2121
},
2222
{
2323
"cell_type": "code",
24-
"execution_count": null,
24+
"execution_count": 3,
2525
"metadata": {},
2626
"outputs": [],
2727
"source": [
@@ -44,7 +44,7 @@
4444
},
4545
{
4646
"cell_type": "code",
47-
"execution_count": null,
47+
"execution_count": 4,
4848
"metadata": {},
4949
"outputs": [],
5050
"source": [
@@ -72,73 +72,263 @@
7272
},
7373
{
7474
"cell_type": "code",
75-
"execution_count": null,
75+
"execution_count": 5,
7676
"metadata": {},
77-
"outputs": [],
77+
"outputs": [
78+
{
79+
"data": {
80+
"text/html": [
81+
"<table border=\"1\" class=\"dataframe\">\n",
82+
" <thead>\n",
83+
" <tr>\n",
84+
" <th>Year</th> <th>Major</th>\n",
85+
" </tr>\n",
86+
" </thead>\n",
87+
" <tbody>\n",
88+
" <tr>\n",
89+
" <td>Second</td> <td>Declared</td>\n",
90+
" </tr>\n",
91+
" <tr>\n",
92+
" <td>Second</td> <td>Declared</td>\n",
93+
" </tr>\n",
94+
" <tr>\n",
95+
" <td>Second</td> <td>Declared</td>\n",
96+
" </tr>\n",
97+
" </tbody>\n",
98+
"</table>\n",
99+
"<p>... (97 rows omitted)</p>"
100+
],
101+
"text/plain": [
102+
"<IPython.core.display.HTML object>"
103+
]
104+
},
105+
"metadata": {},
106+
"output_type": "display_data"
107+
}
108+
],
78109
"source": [
79110
"students.show(3)"
80111
]
81112
},
82113
{
83114
"cell_type": "code",
84-
"execution_count": null,
115+
"execution_count": 7,
85116
"metadata": {},
86-
"outputs": [],
117+
"outputs": [
118+
{
119+
"data": {
120+
"text/html": [
121+
"<table border=\"1\" class=\"dataframe\">\n",
122+
" <thead>\n",
123+
" <tr>\n",
124+
" <th>Year</th> <th>count</th>\n",
125+
" </tr>\n",
126+
" </thead>\n",
127+
" <tbody>\n",
128+
" <tr>\n",
129+
" <td>Second</td> <td>60 </td>\n",
130+
" </tr>\n",
131+
" <tr>\n",
132+
" <td>Third </td> <td>40 </td>\n",
133+
" </tr>\n",
134+
" </tbody>\n",
135+
"</table>"
136+
],
137+
"text/plain": [
138+
"Year | count\n",
139+
"Second | 60\n",
140+
"Third | 40"
141+
]
142+
},
143+
"execution_count": 7,
144+
"metadata": {},
145+
"output_type": "execute_result"
146+
}
147+
],
87148
"source": [
88-
"students.pivot('Major', 'Year')"
149+
"students.group('Year')"
89150
]
90151
},
91152
{
92153
"cell_type": "code",
93-
"execution_count": null,
154+
"execution_count": 8,
94155
"metadata": {},
95-
"outputs": [],
156+
"outputs": [
157+
{
158+
"data": {
159+
"text/html": [
160+
"<table border=\"1\" class=\"dataframe\">\n",
161+
" <thead>\n",
162+
" <tr>\n",
163+
" <th>Major</th> <th>count</th>\n",
164+
" </tr>\n",
165+
" </thead>\n",
166+
" <tbody>\n",
167+
" <tr>\n",
168+
" <td>Declared </td> <td>30 </td>\n",
169+
" </tr>\n",
170+
" <tr>\n",
171+
" <td>Undeclared</td> <td>30 </td>\n",
172+
" </tr>\n",
173+
" </tbody>\n",
174+
"</table>"
175+
],
176+
"text/plain": [
177+
"Major | count\n",
178+
"Declared | 30\n",
179+
"Undeclared | 30"
180+
]
181+
},
182+
"execution_count": 8,
183+
"metadata": {},
184+
"output_type": "execute_result"
185+
}
186+
],
96187
"source": [
97-
"# Verify: 60% of students are Second years, 40% are Third years\n",
98-
"60 / (60 + 40)"
188+
"students.where('Year', 'Second').group('Major')"
99189
]
100190
},
101191
{
102192
"cell_type": "code",
103-
"execution_count": null,
193+
"execution_count": 9,
104194
"metadata": {},
105-
"outputs": [],
195+
"outputs": [
196+
{
197+
"data": {
198+
"text/html": [
199+
"<table border=\"1\" class=\"dataframe\">\n",
200+
" <thead>\n",
201+
" <tr>\n",
202+
" <th>Major</th> <th>count</th>\n",
203+
" </tr>\n",
204+
" </thead>\n",
205+
" <tbody>\n",
206+
" <tr>\n",
207+
" <td>Declared </td> <td>32 </td>\n",
208+
" </tr>\n",
209+
" <tr>\n",
210+
" <td>Undeclared</td> <td>8 </td>\n",
211+
" </tr>\n",
212+
" </tbody>\n",
213+
"</table>"
214+
],
215+
"text/plain": [
216+
"Major | count\n",
217+
"Declared | 32\n",
218+
"Undeclared | 8"
219+
]
220+
},
221+
"execution_count": 9,
222+
"metadata": {},
223+
"output_type": "execute_result"
224+
}
225+
],
106226
"source": [
107-
"# Verify: 50% of Second years have Declared\n",
108-
"30 / 60"
227+
"students.where('Year', 'Third').group('Major')"
109228
]
110229
},
111230
{
112231
"cell_type": "code",
113-
"execution_count": null,
232+
"execution_count": 10,
114233
"metadata": {},
115-
"outputs": [],
234+
"outputs": [
235+
{
236+
"data": {
237+
"text/plain": [
238+
"0.8"
239+
]
240+
},
241+
"execution_count": 10,
242+
"metadata": {},
243+
"output_type": "execute_result"
244+
}
245+
],
116246
"source": [
117-
"# Verify: 80% of Third years have Declared\n",
118-
"32 / 40"
247+
"32 / (32 + 8)"
119248
]
120249
},
121250
{
122251
"cell_type": "code",
123-
"execution_count": null,
252+
"execution_count": 11,
124253
"metadata": {},
125-
"outputs": [],
254+
"outputs": [
255+
{
256+
"data": {
257+
"text/html": [
258+
"<table border=\"1\" class=\"dataframe\">\n",
259+
" <thead>\n",
260+
" <tr>\n",
261+
" <th>Year</th> <th>Declared</th> <th>Undeclared</th>\n",
262+
" </tr>\n",
263+
" </thead>\n",
264+
" <tbody>\n",
265+
" <tr>\n",
266+
" <td>Second</td> <td>30 </td> <td>30 </td>\n",
267+
" </tr>\n",
268+
" <tr>\n",
269+
" <td>Third </td> <td>32 </td> <td>8 </td>\n",
270+
" </tr>\n",
271+
" </tbody>\n",
272+
"</table>"
273+
],
274+
"text/plain": [
275+
"Year | Declared | Undeclared\n",
276+
"Second | 30 | 30\n",
277+
"Third | 32 | 8"
278+
]
279+
},
280+
"execution_count": 11,
281+
"metadata": {},
282+
"output_type": "execute_result"
283+
}
284+
],
285+
"source": [
286+
"students.pivot('Major', 'Year')"
287+
]
288+
},
289+
{
290+
"cell_type": "code",
291+
"execution_count": 12,
292+
"metadata": {},
293+
"outputs": [
294+
{
295+
"data": {
296+
"text/plain": [
297+
"0.4838709677419355"
298+
]
299+
},
300+
"execution_count": 12,
301+
"metadata": {},
302+
"output_type": "execute_result"
303+
}
304+
],
126305
"source": [
127306
"# Chance of second year, given that they have declared\n",
128307
"# P(second year | declared)\n",
129308
"\n",
130-
"30 / 62"
309+
"30 / (30 + 32)"
131310
]
132311
},
133312
{
134313
"cell_type": "code",
135-
"execution_count": null,
314+
"execution_count": 13,
136315
"metadata": {},
137-
"outputs": [],
316+
"outputs": [
317+
{
318+
"data": {
319+
"text/plain": [
320+
"0.5161290322580645"
321+
]
322+
},
323+
"execution_count": 13,
324+
"metadata": {},
325+
"output_type": "execute_result"
326+
}
327+
],
138328
"source": [
139329
"# P(third year | declared)\n",
140330
"\n",
141-
"32 / 62"
331+
"32 / (30 + 32)"
142332
]
143333
},
144334
{
@@ -186,9 +376,20 @@
186376
},
187377
{
188378
"cell_type": "code",
189-
"execution_count": null,
379+
"execution_count": 14,
190380
"metadata": {},
191-
"outputs": [],
381+
"outputs": [
382+
{
383+
"data": {
384+
"text/plain": [
385+
"0.689655172413793"
386+
]
387+
},
388+
"execution_count": 14,
389+
"metadata": {},
390+
"output_type": "execute_result"
391+
}
392+
],
192393
"source": [
193394
"# P(disease | tested +)\n",
194395
"\n",

0 commit comments

Comments
 (0)