|
| 1 | +"""Тестирование к курсу.""" |
| 2 | + |
| 3 | +import os |
| 4 | + |
| 5 | +import numpy as np # linear algebra |
| 6 | +import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv) |
| 7 | +from numpy.typing import NDArray # type: ignore |
| 8 | + |
| 9 | + |
| 10 | +class SimpleLinearRegression: |
| 11 | + """Simple Linear Regression class.""" |
| 12 | + |
| 13 | + coef = 0 |
| 14 | + intercept = 0 |
| 15 | + r_squared = 0 |
| 16 | + |
| 17 | + def fit( |
| 18 | + self, |
| 19 | + x_train_arg: NDArray[np.generic], |
| 20 | + y_train_arg: NDArray[np.generic], |
| 21 | + ) -> None: |
| 22 | + """Вычисляет коэффициенты регрессии.""" |
| 23 | + sum_of_x = sum(x_train_arg) |
| 24 | + sum_of_y = sum(y_train_arg) |
| 25 | + |
| 26 | + sum_of_x2 = np.sum(np.square(x_train_arg)) |
| 27 | + sum_of_y2 = np.sum(np.square(y_train_arg)) |
| 28 | + dot_product = np.dot(x_train_arg, y_train_arg) |
| 29 | + |
| 30 | + length = len(x_train_arg) |
| 31 | + |
| 32 | + dif_x = sum_of_x2 - sum_of_x * sum_of_x / length |
| 33 | + # dif_y = sum_of_y2 - sum_of_y * sum_of_y / length |
| 34 | + |
| 35 | + numerator = length * dot_product - sum_of_x * sum_of_y |
| 36 | + denom = (length * sum_of_x2 - sum_of_x * sum_of_x) * ( |
| 37 | + length * sum_of_y2 - (sum_of_y * sum_of_y) |
| 38 | + ) |
| 39 | + |
| 40 | + co = dot_product - sum_of_x * sum_of_y / length |
| 41 | + |
| 42 | + self.r_squared = np.square(numerator / np.sqrt(denom)) |
| 43 | + self.intercept = sum_of_y / length - ((co / dif_x) * sum_of_x / length) |
| 44 | + self.coef = co / dif_x |
| 45 | + |
| 46 | + def predict(self, x_test: NDArray[np.generic]) -> object: # |
| 47 | + """Предсказывает значение.""" |
| 48 | + return x_test * self.coef + self.intercept |
| 49 | + |
| 50 | + |
| 51 | +script_dir = os.path.dirname(os.path.abspath(__file__)) |
| 52 | +file_path = os.path.join(script_dir, "tvmarketing.csv") |
| 53 | +data_set = pd.read_csv(file_path) |
| 54 | + |
| 55 | +x_train = np.array(data_set[["TV"]]) |
| 56 | +y_train = np.array(data_set[["Sales"]]) |
| 57 | + |
| 58 | +# Сейчас x_train имеет вид |
| 59 | +# [[230.1] |
| 60 | +# [ 44.5] |
| 61 | +# [ 17.2] |
| 62 | +# ... |
| 63 | +# [232.1]] |
| 64 | +# Обычно метод fit в классе LinearRegression ожидает двумерный массив |
| 65 | +# (как выглядит x сейчас) |
| 66 | +# Но fit из нашего кастомного класса SimpleLinearRegression |
| 67 | +# ожидает одномерный массив. |
| 68 | +# Поэтому преобразуем x_train в одномерный массив |
| 69 | +x_train = x_train.ravel() |
| 70 | +y_train = y_train.ravel() |
| 71 | + |
| 72 | +simple_linear_regression = SimpleLinearRegression() |
| 73 | + |
| 74 | +simple_linear_regression.fit(x_train, y_train) |
| 75 | + |
| 76 | +print(simple_linear_regression.coef) |
| 77 | +print(simple_linear_regression.intercept) |
| 78 | +print(simple_linear_regression.r_squared) |
0 commit comments