forked from Pradeepsingh61/DSA_Code
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathlinear_regression.py
More file actions
92 lines (73 loc) · 2.85 KB
/
Copy pathlinear_regression.py
File metadata and controls
92 lines (73 loc) · 2.85 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
"""
Algorithm: Linear Regression (Analytical / Closed-form Solution)
Description:
This script implements Linear Regression using the closed-form solution.
It calculates the best-fit line by directly computing the slope (b1) and intercept (b0)
using formulas derived from minimizing the Mean Squared Error (MSE).
Time Complexity: O(n) # Single pass through data
Space Complexity: O(1) # Only a few variables stored
"""
import numpy as np
import matplotlib.pyplot as plt
# --- Function for Analytical Linear Regression ---
def linear_regression_analytical(x, y):
"""
Computes linear regression coefficients using the analytical method.
Parameters:
x (np.ndarray): Feature values
y (np.ndarray): Target values
Returns:
tuple: b0 (intercept), b1 (slope), y_pred (predictions), SSE, R²
"""
# Compute mean of x and y
x_mean, y_mean = np.mean(x), np.mean(y)
# Compute slope (b1) using formula
b1 = np.sum((x - x_mean) * (y - y_mean)) / np.sum((x - x_mean)**2)
# Compute intercept (b0) using formula
b0 = y_mean - b1 * x_mean
# Compute predicted y values
y_pred = b0 + b1 * x
# Compute Sum of Squared Errors (SSE)
SSE = np.sum((y - y_pred)**2)
# Compute Total Sum of Squares (SST) for R²
SST = np.sum((y - y_mean)**2)
# Compute R² score (coefficient of determination)
R2 = 1 - (SSE / SST)
return b0, b1, y_pred, SSE, R2
# --- Test Cases ---
test_cases = {
"Simple Linear": {
"x": np.array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]),
"y": np.array([1, 3, 2, 5, 7, 8, 8, 9, 10, 12])
},
"Perfectly Linear": {
"x": np.arange(0, 10),
"y": 3 * np.arange(0, 10) + 2 # y = 3x + 2
},
"Negative Slope": {
"x": np.arange(0, 10),
"y": 20 - 2 * np.arange(0, 10) # y = -2x + 20
},
"Noisy Data": {
"x": np.arange(0, 10),
"y": 5 * np.arange(0, 10) + np.random.normal(0, 3, 10) # y = 5x + noise
}
}
# Loop through each test case
for name, data in test_cases.items():
print(f"\n=== {name} ===")
x, y = data["x"], data["y"]
# Compute linear regression using analytical method
b0, b1, y_pred, SSE, R2 = linear_regression_analytical(x, y)
# Print coefficients and metrics
print(f"Intercept (b0): {b0:.4f}, Slope (b1): {b1:.4f}")
print(f"SSE: {SSE:.4f}, R²: {R2:.4f}")
# Plot data points and fitted line
plt.figure(figsize=(6, 4))
plt.scatter(x, y, color="blue", label="Data Points") # Original data points
plt.plot(x, y_pred, "r-", label="Analytical Solution") # Fitted line
plt.xlabel("x") # x-axis label
plt.ylabel("y") # y-axis label
plt.legend()
plt.title(f"Linear Regression - {name}") # Plot title
plt.show()