Skip to content

Commit 8350521

Browse files
committed
lec 37
1 parent a918203 commit 8350521

1 file changed

Lines changed: 254 additions & 0 deletions

File tree

lec/lec37/lec37.ipynb

Lines changed: 254 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,254 @@
1+
{
2+
"cells": [
3+
{
4+
"cell_type": "code",
5+
"execution_count": null,
6+
"metadata": {},
7+
"outputs": [],
8+
"source": [
9+
"from datascience import *\n",
10+
"import numpy as np\n",
11+
"import matplotlib\n",
12+
"from mpl_toolkits.mplot3d import Axes3D\n",
13+
"\n",
14+
"%matplotlib inline\n",
15+
"import matplotlib.pyplot as plots\n",
16+
"plots.style.use('fivethirtyeight')\n",
17+
"\n",
18+
"import warnings\n",
19+
"warnings.simplefilter(\"ignore\")"
20+
]
21+
},
22+
{
23+
"cell_type": "code",
24+
"execution_count": null,
25+
"metadata": {},
26+
"outputs": [],
27+
"source": [
28+
"# np.array(list) converts list to an array\n",
29+
"# provided all the elements of list are of the same type\n",
30+
"\n",
31+
"n = 100\n",
32+
"second = round(n * 0.6)\n",
33+
"third = round(n * 0.4)\n",
34+
"\n",
35+
"year = np.array(['Second'] * second + ['Third'] * third)\n",
36+
"major = np.array(['Declared'] * (round(second * 0.5)) + ['Undeclared'] * (round(second * 0.5)) + \\\n",
37+
" ['Declared'] * (round(third * 0.8)) + ['Undeclared'] * (round(third * 0.2)))\n",
38+
" \n",
39+
"students = Table().with_columns(\n",
40+
" 'Year', year,\n",
41+
" 'Major', major\n",
42+
")"
43+
]
44+
},
45+
{
46+
"cell_type": "code",
47+
"execution_count": null,
48+
"metadata": {},
49+
"outputs": [],
50+
"source": [
51+
"def create_population(prior_disease_prob, n):\n",
52+
" disease = round(n * prior_disease_prob)\n",
53+
" no_disease = round(n * (1 - prior_disease_prob))\n",
54+
"\n",
55+
" status = np.array(['Disease'] * disease + ['No disease'] * no_disease)\n",
56+
" result = np.array(['Test +'] * (disease) + ['Test +'] * (round(no_disease * 0.05)) + \\\n",
57+
" ['Test -'] * (round(no_disease * 0.95)))\n",
58+
" \n",
59+
" t = Table().with_columns(\n",
60+
" 'Status', status,\n",
61+
" 'Test Result', result\n",
62+
" )\n",
63+
" return t.pivot('Test Result', 'Status')"
64+
]
65+
},
66+
{
67+
"cell_type": "markdown",
68+
"metadata": {},
69+
"source": [
70+
"## More Likely Than Not ##"
71+
]
72+
},
73+
{
74+
"cell_type": "code",
75+
"execution_count": null,
76+
"metadata": {},
77+
"outputs": [],
78+
"source": [
79+
"students.show(3)"
80+
]
81+
},
82+
{
83+
"cell_type": "code",
84+
"execution_count": null,
85+
"metadata": {},
86+
"outputs": [],
87+
"source": [
88+
"students.pivot('Major', 'Year')"
89+
]
90+
},
91+
{
92+
"cell_type": "code",
93+
"execution_count": null,
94+
"metadata": {},
95+
"outputs": [],
96+
"source": [
97+
"# Verify: 60% of students are Second years, 40% are Third years\n",
98+
"60 / (60 + 40)"
99+
]
100+
},
101+
{
102+
"cell_type": "code",
103+
"execution_count": null,
104+
"metadata": {},
105+
"outputs": [],
106+
"source": [
107+
"# Verify: 50% of Second years have Declared\n",
108+
"30 / 60"
109+
]
110+
},
111+
{
112+
"cell_type": "code",
113+
"execution_count": null,
114+
"metadata": {},
115+
"outputs": [],
116+
"source": [
117+
"# Verify: 80% of Third years have Declared\n",
118+
"32 / 40"
119+
]
120+
},
121+
{
122+
"cell_type": "code",
123+
"execution_count": null,
124+
"metadata": {},
125+
"outputs": [],
126+
"source": [
127+
"# Chance of second year, given that they have declared\n",
128+
"# P(second year | declared)\n",
129+
"\n",
130+
"30 / 62"
131+
]
132+
},
133+
{
134+
"cell_type": "code",
135+
"execution_count": null,
136+
"metadata": {},
137+
"outputs": [],
138+
"source": [
139+
"# P(third year | declared)\n",
140+
"\n",
141+
"32 / 62"
142+
]
143+
},
144+
{
145+
"cell_type": "markdown",
146+
"metadata": {},
147+
"source": [
148+
"## Tree Diagram Calculation"
149+
]
150+
},
151+
{
152+
"cell_type": "code",
153+
"execution_count": null,
154+
"metadata": {},
155+
"outputs": [],
156+
"source": [
157+
"# P(second year | declared), from tree diagram\n",
158+
"\n",
159+
"(0.6 * 0.5) / (0.6 * 0.5 + 0.4 * 0.8)"
160+
]
161+
},
162+
{
163+
"cell_type": "markdown",
164+
"metadata": {},
165+
"source": [
166+
"## Decisions ##"
167+
]
168+
},
169+
{
170+
"cell_type": "code",
171+
"execution_count": null,
172+
"metadata": {},
173+
"outputs": [],
174+
"source": [
175+
"create_population(1/1000, 10000)"
176+
]
177+
},
178+
{
179+
"cell_type": "code",
180+
"execution_count": null,
181+
"metadata": {},
182+
"outputs": [],
183+
"source": [
184+
"10 / 510"
185+
]
186+
},
187+
{
188+
"cell_type": "code",
189+
"execution_count": null,
190+
"metadata": {},
191+
"outputs": [],
192+
"source": [
193+
"# P(disease | tested +)\n",
194+
"\n",
195+
"# = P(disease & tested +) / P(tested +)\n",
196+
"\n",
197+
"# if prior probability of disease is 1/10\n",
198+
"\n",
199+
"(0.1 * 1) / (0.1*1 + 0.9*0.05)"
200+
]
201+
},
202+
{
203+
"cell_type": "code",
204+
"execution_count": null,
205+
"metadata": {},
206+
"outputs": [],
207+
"source": [
208+
"create_population(1/10, 10000)"
209+
]
210+
},
211+
{
212+
"cell_type": "code",
213+
"execution_count": null,
214+
"metadata": {},
215+
"outputs": [],
216+
"source": [
217+
"# P(disease | tested +)\n",
218+
"# if prior probability of disease is 0.5\n",
219+
"\n",
220+
"(0.5 * 1) / (0.5*1 + 0.5*0.05)"
221+
]
222+
},
223+
{
224+
"cell_type": "code",
225+
"execution_count": null,
226+
"metadata": {},
227+
"outputs": [],
228+
"source": [
229+
"create_population(1/2, 10000)"
230+
]
231+
}
232+
],
233+
"metadata": {
234+
"kernelspec": {
235+
"display_name": "Python 3 (ipykernel)",
236+
"language": "python",
237+
"name": "python3"
238+
},
239+
"language_info": {
240+
"codemirror_mode": {
241+
"name": "ipython",
242+
"version": 3
243+
},
244+
"file_extension": ".py",
245+
"mimetype": "text/x-python",
246+
"name": "python",
247+
"nbconvert_exporter": "python",
248+
"pygments_lexer": "ipython3",
249+
"version": "3.11.5"
250+
}
251+
},
252+
"nbformat": 4,
253+
"nbformat_minor": 4
254+
}

0 commit comments

Comments
 (0)