Skip to content

Commit 747b79d

Browse files
committed
OLS LinearRegression course
1 parent fae895a commit 747b79d

6 files changed

Lines changed: 568 additions & 0 deletions

File tree

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
# Простая Линейная Регрессия Python. Математика машинного обучения
2+
3+
## Stepik
4+
5+
Ссылка на курс [https://stepik.org/course/239757](https://stepik.org/course/239757)
6+
7+
Реферальная ссылка на курс https://stepik.org/a/239757
Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
"""Тестирование к курсу."""
2+
3+
import numpy as np
4+
from sklearn.linear_model import LinearRegression
5+
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
6+
7+
x_array = np.array([1, 2, 3, 4, 5, 6]).reshape(-1, 1)
8+
y_array = np.array([10, 12, 15, 20, 22, 24])
9+
10+
model = LinearRegression()
11+
model.fit(x_array, y_array)
12+
13+
14+
y_pred = model.predict(x_array)
15+
16+
# если R2_Score - коэффициент близок к 1, то мы имеем
17+
# сильную линейную зависимость
18+
r2 = r2_score(y_array, y_pred)
19+
20+
print(mean_squared_error(y_array, y_pred))
21+
print(mean_absolute_error(y_array, y_pred))
22+
print(model.coef_[0]) # Наклон
23+
print(model.intercept_) # Смещение - чему равен y при x=0
24+
25+
26+
y_true = np.array([10, 10.5, 11])
27+
28+
y_pred = np.array([10.2, 10.4, 11.1])
29+
30+
errors = y_true - y_pred
31+
print(errors)
32+
abs_errors = np.abs(errors)
33+
print(abs_errors)
34+
squared_errors = errors**2
35+
36+
print(sum(abs_errors) / len(y_true)) # Mean Absolute Error (MAE)
37+
print(sum(squared_errors) / len(y_true)) # Mean Squared Error (MSE)
Lines changed: 78 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,78 @@
1+
"""Тестирование к курсу."""
2+
3+
import os
4+
5+
import numpy as np # linear algebra
6+
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
7+
from numpy.typing import NDArray # type: ignore
8+
9+
10+
class SimpleLinearRegression:
11+
"""Simple Linear Regression class."""
12+
13+
coef = 0
14+
intercept = 0
15+
r_squared = 0
16+
17+
def fit(
18+
self,
19+
x_train_arg: NDArray[np.generic],
20+
y_train_arg: NDArray[np.generic],
21+
) -> None:
22+
"""Вычисляет коэффициенты регрессии."""
23+
sum_of_x = sum(x_train_arg)
24+
sum_of_y = sum(y_train_arg)
25+
26+
sum_of_x2 = np.sum(np.square(x_train_arg))
27+
sum_of_y2 = np.sum(np.square(y_train_arg))
28+
dot_product = np.dot(x_train_arg, y_train_arg)
29+
30+
length = len(x_train_arg)
31+
32+
dif_x = sum_of_x2 - sum_of_x * sum_of_x / length
33+
# dif_y = sum_of_y2 - sum_of_y * sum_of_y / length
34+
35+
numerator = length * dot_product - sum_of_x * sum_of_y
36+
denom = (length * sum_of_x2 - sum_of_x * sum_of_x) * (
37+
length * sum_of_y2 - (sum_of_y * sum_of_y)
38+
)
39+
40+
co = dot_product - sum_of_x * sum_of_y / length
41+
42+
self.r_squared = np.square(numerator / np.sqrt(denom))
43+
self.intercept = sum_of_y / length - ((co / dif_x) * sum_of_x / length)
44+
self.coef = co / dif_x
45+
46+
def predict(self, x_test: NDArray[np.generic]) -> object: #
47+
"""Предсказывает значение."""
48+
return x_test * self.coef + self.intercept
49+
50+
51+
script_dir = os.path.dirname(os.path.abspath(__file__))
52+
file_path = os.path.join(script_dir, "tvmarketing.csv")
53+
data_set = pd.read_csv(file_path)
54+
55+
x_train = np.array(data_set[["TV"]])
56+
y_train = np.array(data_set[["Sales"]])
57+
58+
# Сейчас x_train имеет вид
59+
# [[230.1]
60+
# [ 44.5]
61+
# [ 17.2]
62+
# ...
63+
# [232.1]]
64+
# Обычно метод fit в классе LinearRegression ожидает двумерный массив
65+
# (как выглядит x сейчас)
66+
# Но fit из нашего кастомного класса SimpleLinearRegression
67+
# ожидает одномерный массив.
68+
# Поэтому преобразуем x_train в одномерный массив
69+
x_train = x_train.ravel()
70+
y_train = y_train.ravel()
71+
72+
simple_linear_regression = SimpleLinearRegression()
73+
74+
simple_linear_regression.fit(x_train, y_train)
75+
76+
print(simple_linear_regression.coef)
77+
print(simple_linear_regression.intercept)
78+
print(simple_linear_regression.r_squared)
Lines changed: 201 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,201 @@
1+
TV,Sales
2+
230.1,22.1
3+
44.5,10.4
4+
17.2,9.3
5+
151.5,18.5
6+
180.8,12.9
7+
8.7,7.2
8+
57.5,11.8
9+
120.2,13.2
10+
8.6,4.8
11+
199.8,10.6
12+
66.1,8.6
13+
214.7,17.4
14+
23.8,9.2
15+
97.5,9.7
16+
204.1,19
17+
195.4,22.4
18+
67.8,12.5
19+
281.4,24.4
20+
69.2,11.3
21+
147.3,14.6
22+
218.4,18
23+
237.4,12.5
24+
13.2,5.6
25+
228.3,15.5
26+
62.3,9.7
27+
262.9,12
28+
142.9,15
29+
240.1,15.9
30+
248.8,18.9
31+
70.6,10.5
32+
292.9,21.4
33+
112.9,11.9
34+
97.2,9.6
35+
265.6,17.4
36+
95.7,9.5
37+
290.7,12.8
38+
266.9,25.4
39+
74.7,14.7
40+
43.1,10.1
41+
228,21.5
42+
202.5,16.6
43+
177,17.1
44+
293.6,20.7
45+
206.9,12.9
46+
25.1,8.5
47+
175.1,14.9
48+
89.7,10.6
49+
239.9,23.2
50+
227.2,14.8
51+
66.9,9.7
52+
199.8,11.4
53+
100.4,10.7
54+
216.4,22.6
55+
182.6,21.2
56+
262.7,20.2
57+
198.9,23.7
58+
7.3,5.5
59+
136.2,13.2
60+
210.8,23.8
61+
210.7,18.4
62+
53.5,8.1
63+
261.3,24.2
64+
239.3,15.7
65+
102.7,14
66+
131.1,18
67+
69,9.3
68+
31.5,9.5
69+
139.3,13.4
70+
237.4,18.9
71+
216.8,22.3
72+
199.1,18.3
73+
109.8,12.4
74+
26.8,8.8
75+
129.4,11
76+
213.4,17
77+
16.9,8.7
78+
27.5,6.9
79+
120.5,14.2
80+
5.4,5.3
81+
116,11
82+
76.4,11.8
83+
239.8,12.3
84+
75.3,11.3
85+
68.4,13.6
86+
213.5,21.7
87+
193.2,15.2
88+
76.3,12
89+
110.7,16
90+
88.3,12.9
91+
109.8,16.7
92+
134.3,11.2
93+
28.6,7.3
94+
217.7,19.4
95+
250.9,22.2
96+
107.4,11.5
97+
163.3,16.9
98+
197.6,11.7
99+
184.9,15.5
100+
289.7,25.4
101+
135.2,17.2
102+
222.4,11.7
103+
296.4,23.8
104+
280.2,14.8
105+
187.9,14.7
106+
238.2,20.7
107+
137.9,19.2
108+
25,7.2
109+
90.4,8.7
110+
13.1,5.3
111+
255.4,19.8
112+
225.8,13.4
113+
241.7,21.8
114+
175.7,14.1
115+
209.6,15.9
116+
78.2,14.6
117+
75.1,12.6
118+
139.2,12.2
119+
76.4,9.4
120+
125.7,15.9
121+
19.4,6.6
122+
141.3,15.5
123+
18.8,7
124+
224,11.6
125+
123.1,15.2
126+
229.5,19.7
127+
87.2,10.6
128+
7.8,6.6
129+
80.2,8.8
130+
220.3,24.7
131+
59.6,9.7
132+
0.7,1.6
133+
265.2,12.7
134+
8.4,5.7
135+
219.8,19.6
136+
36.9,10.8
137+
48.3,11.6
138+
25.6,9.5
139+
273.7,20.8
140+
43,9.6
141+
184.9,20.7
142+
73.4,10.9
143+
193.7,19.2
144+
220.5,20.1
145+
104.6,10.4
146+
96.2,11.4
147+
140.3,10.3
148+
240.1,13.2
149+
243.2,25.4
150+
38,10.9
151+
44.7,10.1
152+
280.7,16.1
153+
121,11.6
154+
197.6,16.6
155+
171.3,19
156+
187.8,15.6
157+
4.1,3.2
158+
93.9,15.3
159+
149.8,10.1
160+
11.7,7.3
161+
131.7,12.9
162+
172.5,14.4
163+
85.7,13.3
164+
188.4,14.9
165+
163.5,18
166+
117.2,11.9
167+
234.5,11.9
168+
17.9,8
169+
206.8,12.2
170+
215.4,17.1
171+
284.3,15
172+
50,8.4
173+
164.5,14.5
174+
19.6,7.6
175+
168.4,11.7
176+
222.4,11.5
177+
276.9,27
178+
248.4,20.2
179+
170.2,11.7
180+
276.7,11.8
181+
165.6,12.6
182+
156.6,10.5
183+
218.5,12.2
184+
56.2,8.7
185+
287.6,26.2
186+
253.8,17.6
187+
205,22.6
188+
139.5,10.3
189+
191.1,17.3
190+
286,15.9
191+
18.7,6.7
192+
39.5,10.8
193+
75.5,9.9
194+
17.2,5.9
195+
166.8,19.6
196+
149.7,17.3
197+
38.2,7.6
198+
94.2,9.7
199+
177,12.8
200+
283.6,25.5
201+
232.1,13.4

courses/Simple-regression-OLS-stepik/ols.ipynb

Lines changed: 149 additions & 0 deletions
Large diffs are not rendered by default.

0 commit comments

Comments
 (0)