Skip to content

Commit 5fb638d

Browse files
author
lck6055
committed
Updated linear regression and gradient descent files with complexity and test cases
1 parent df4a6b5 commit 5fb638d

2 files changed

Lines changed: 167 additions & 79 deletions

File tree

Lines changed: 103 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -1,74 +1,134 @@
1+
"""
2+
Algorithm: Linear Regression using Gradient Descent (Batch & Stochastic)
3+
4+
Description:
5+
This script implements Linear Regression using both Batch Gradient Descent (BGD)
6+
and Stochastic Gradient Descent (SGD).
7+
8+
Linear Regression fits a straight line to data points (x, y) by minimizing
9+
the Mean Squared Error (MSE) between the predicted and actual values.
10+
11+
Gradient Descent iteratively adjusts parameters (b0 - intercept, b1 - slope)
12+
in the opposite direction of the gradient of the loss function.
13+
14+
Mathematical Formulation:
15+
y_pred = b0 + b1 * x
16+
Loss (MSE) = (1/n) * Σ(y - y_pred)²
17+
Gradients:
18+
∂L/∂b0 = -(2/n) * Σ(y - y_pred)
19+
∂L/∂b1 = -(2/n) * Σ(x * (y - y_pred))
20+
21+
Variants:
22+
- Batch Gradient Descent: Uses all samples per iteration (stable but slower)
23+
- Stochastic Gradient Descent: Uses one random sample per iteration (faster but noisier)
24+
25+
Time Complexity:
26+
- Batch Gradient Descent: O(n * epochs)
27+
- Stochastic Gradient Descent: O(epochs)
28+
Space Complexity: O(1)
29+
"""
30+
131
import numpy as np
232
import matplotlib.pyplot as plt
333

4-
# Linear Regression using Gradient Descent (Full-Batch and SGD)
534

6-
# Dataset
7-
x = np.array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
8-
y = np.array([1, 3, 2, 5, 7, 8, 8, 9, 10, 12])
9-
10-
# Gradient Descent Function
1135
def gradient_descent(x, y, lr=0.01, epochs=10000, tolerance=1e-6, stochastic=False):
36+
"""
37+
Performs linear regression using Gradient Descent.
38+
39+
Parameters:
40+
x (np.ndarray): Feature values
41+
y (np.ndarray): Target values
42+
lr (float): Learning rate
43+
epochs (int): Maximum iterations
44+
tolerance (float): Convergence threshold
45+
stochastic (bool): If True, performs Stochastic Gradient Descent
46+
47+
Returns:
48+
tuple: (b0, b1) - Intercept and Slope
49+
"""
1250
b0, b1 = 0, 0
1351
n = len(x)
1452

1553
for epoch in range(epochs):
1654
if stochastic:
17-
# Stochastic (single sample)
55+
# Stochastic Gradient Descent (single random sample)
1856
i = np.random.randint(0, n)
1957
xi, yi = x[i], y[i]
2058
y_pred = b0 + b1 * xi
2159
b0_grad = -(yi - y_pred)
2260
b1_grad = -(yi - y_pred) * xi
2361
else:
24-
# Batch Gradient Descent
62+
# Batch Gradient Descent (uses full dataset)
2563
y_pred = b0 + b1 * x
2664
b0_grad = -np.sum(y - y_pred) / n
2765
b1_grad = -np.sum((y - y_pred) * x) / n
2866

29-
# Update parameters
67+
# Parameter updates
3068
b0_new = b0 - lr * b0_grad
3169
b1_new = b1 - lr * b1_grad
3270

33-
# Check convergence
71+
# Convergence check
3472
if abs(b0_new - b0) < tolerance and abs(b1_new - b1) < tolerance:
73+
print(f"✅ Converged after {epoch} epochs")
3574
break
3675

3776
b0, b1 = b0_new, b1_new
3877

3978
return b0, b1
4079

4180

42-
# Full-Batch Gradient Descent
43-
b0_gd, b1_gd = gradient_descent(x, y, lr=0.01, epochs=10000, stochastic=False)
44-
y_pred_gd = b0_gd + b1_gd * x
45-
46-
# Stochastic Gradient Descent
47-
b0_sgd, b1_sgd = gradient_descent(x, y, lr=0.01, epochs=10000, stochastic=True)
48-
y_pred_sgd = b0_sgd + b1_sgd * x
49-
50-
# Compute metrics
51-
SST = np.sum((y - np.mean(y))**2)
52-
SSE_gd = np.sum((y - y_pred_gd)**2)
53-
SSE_sgd = np.sum((y - y_pred_sgd)**2)
54-
R2_gd = 1 - (SSE_gd / SST)
55-
R2_sgd = 1 - (SSE_sgd / SST)
56-
57-
# Print results
58-
print("=== Gradient Descent (Full-Batch) ===")
59-
print(f"Intercept (b0): {b0_gd:.4f}, Slope (b1): {b1_gd:.4f}")
60-
print(f"SSE: {SSE_gd:.4f}, R²: {R2_gd:.4f}")
61-
62-
print("\n=== Gradient Descent (Stochastic) ===")
63-
print(f"Intercept (b0): {b0_sgd:.4f}, Slope (b1): {b1_sgd:.4f}")
64-
print(f"SSE: {SSE_sgd:.4f}, R²: {R2_sgd:.4f}")
65-
66-
# Plot comparison
67-
plt.scatter(x, y, color="blue", label="Data")
68-
plt.plot(x, y_pred_gd, "g--", label="Batch Gradient Descent")
69-
plt.plot(x, y_pred_sgd, "m:", label="Stochastic GD")
70-
plt.xlabel("x")
71-
plt.ylabel("y")
72-
plt.legend()
73-
plt.title("Linear Regression using Gradient Descent (Batch & Stochastic)")
74-
plt.show()
81+
# Example Test Cases
82+
if __name__ == "__main__":
83+
test_cases = {
84+
"Case 1: Simple Linear": {
85+
"x": np.array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]),
86+
"y": np.array([1, 3, 2, 5, 7, 8, 8, 9, 10, 12])
87+
},
88+
"Case 2: Perfectly Linear": {
89+
"x": np.arange(0, 10),
90+
"y": 3 * np.arange(0, 10) + 2 # y = 3x + 2
91+
},
92+
"Case 3: Negative Slope": {
93+
"x": np.arange(0, 10),
94+
"y": 20 - 2 * np.arange(0, 10) # y = -2x + 20
95+
},
96+
"Case 4: Random Noise Added": {
97+
"x": np.arange(0, 10),
98+
"y": 5 * np.arange(0, 10) + np.random.normal(0, 3, 10) # y = 5x + noise
99+
}
100+
}
101+
102+
for name, data in test_cases.items():
103+
print(f"\n=== {name} ===")
104+
x, y = data["x"], data["y"]
105+
106+
# Batch Gradient Descent
107+
b0_gd, b1_gd = gradient_descent(x, y, lr=0.01, epochs=10000, stochastic=False)
108+
y_pred_gd = b0_gd + b1_gd * x
109+
110+
# Stochastic Gradient Descent
111+
b0_sgd, b1_sgd = gradient_descent(x, y, lr=0.01, epochs=10000, stochastic=True)
112+
y_pred_sgd = b0_sgd + b1_sgd * x
113+
114+
# Metrics
115+
SST = np.sum((y - np.mean(y))**2)
116+
SSE_gd = np.sum((y - y_pred_gd)**2)
117+
SSE_sgd = np.sum((y - y_pred_sgd)**2)
118+
R2_gd = 1 - (SSE_gd / SST)
119+
R2_sgd = 1 - (SSE_sgd / SST)
120+
121+
# Print results
122+
print(f"Batch GD: Intercept={b0_gd:.4f}, Slope={b1_gd:.4f}, R²={R2_gd:.4f}")
123+
print(f"SGD: Intercept={b0_sgd:.4f}, Slope={b1_sgd:.4f}, R²={R2_sgd:.4f}")
124+
125+
# Visualization
126+
plt.figure(figsize=(6, 4))
127+
plt.scatter(x, y, color="blue", label="Data Points")
128+
plt.plot(x, y_pred_gd, "g--", label="Batch GD")
129+
plt.plot(x, y_pred_sgd, "m:", label="Stochastic GD")
130+
plt.xlabel("x")
131+
plt.ylabel("y")
132+
plt.legend()
133+
plt.title(f"{name}")
134+
plt.show()
Lines changed: 64 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -1,39 +1,67 @@
1+
"""
2+
Algorithm: Linear Regression (Analytical / Closed-form Solution)
3+
4+
Description:
5+
This script implements Linear Regression using the closed-form solution (Analytical method).
6+
It calculates the best-fit line for a dataset by directly computing the slope (b1) and intercept (b0)
7+
using the formulas derived from minimizing the Mean Squared Error (MSE).
8+
9+
Mathematical Formulation:
10+
y_pred = b0 + b1 * x
11+
b1 = Σ((x - mean(x)) * (y - mean(y))) / Σ((x - mean(x))^2)
12+
b0 = mean(y) - b1 * mean(x)
13+
14+
Time Complexity: O(n) # Single pass through the data
15+
Space Complexity: O(1) # Only a few variables stored
16+
"""
17+
118
import numpy as np
219
import matplotlib.pyplot as plt
320

4-
# --- Linear Regression using Analytical (Closed-form) Solution ---
5-
6-
# Dataset
7-
x = np.array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
8-
y = np.array([1, 3, 2, 5, 7, 8, 8, 9, 10, 12])
9-
10-
# Mean values
11-
x_mean, y_mean = np.mean(x), np.mean(y)
12-
13-
# Compute coefficients (Closed-form)
14-
b1 = np.sum((x - x_mean) * (y - y_mean)) / np.sum((x - x_mean)**2)
15-
b0 = y_mean - b1 * x_mean
16-
17-
# Predictions
18-
y_pred = b0 + b1 * x
19-
20-
# Compute SSE and R²
21-
SSE = np.sum((y - y_pred)**2)
22-
SST = np.sum((y - y_mean)**2)
23-
R2 = 1 - (SSE / SST)
24-
25-
# Print results
26-
print("=== Linear Regression (Analytical Solution) ===")
27-
print(f"Intercept (b0): {b0:.4f}")
28-
print(f"Slope (b1): {b1:.4f}")
29-
print(f"SSE: {SSE:.4f}")
30-
print(f"R²: {R2:.4f}")
31-
32-
# Plot results
33-
plt.scatter(x, y, color="blue", label="Data")
34-
plt.plot(x, y_pred, "r-", label="Analytical Solution")
35-
plt.xlabel("x")
36-
plt.ylabel("y")
37-
plt.legend()
38-
plt.title("Linear Regression - Analytical Solution")
39-
plt.show()
21+
# Function for Analytical Linear Regression
22+
def linear_regression_analytical(x, y):
23+
x_mean, y_mean = np.mean(x), np.mean(y)
24+
b1 = np.sum((x - x_mean) * (y - y_mean)) / np.sum((x - x_mean)**2)
25+
b0 = y_mean - b1 * x_mean
26+
y_pred = b0 + b1 * x
27+
SSE = np.sum((y - y_pred)**2)
28+
SST = np.sum((y - y_mean)**2)
29+
R2 = 1 - (SSE / SST)
30+
return b0, b1, y_pred, SSE, R2
31+
32+
# Test Cases
33+
test_cases = {
34+
"Simple Linear": {
35+
"x": np.array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]),
36+
"y": np.array([1, 3, 2, 5, 7, 8, 8, 9, 10, 12])
37+
},
38+
"Perfectly Linear": {
39+
"x": np.arange(0, 10),
40+
"y": 3 * np.arange(0, 10) + 2 # y = 3x + 2
41+
},
42+
"Negative Slope": {
43+
"x": np.arange(0, 10),
44+
"y": 20 - 2 * np.arange(0, 10) # y = -2x + 20
45+
},
46+
"Noisy Data": {
47+
"x": np.arange(0, 10),
48+
"y": 5 * np.arange(0, 10) + np.random.normal(0, 3, 10) # y = 5x + noise
49+
}
50+
}
51+
52+
for name, data in test_cases.items():
53+
print(f"\n=== {name} ===")
54+
x, y = data["x"], data["y"]
55+
b0, b1, y_pred, SSE, R2 = linear_regression_analytical(x, y)
56+
print(f"Intercept (b0): {b0:.4f}, Slope (b1): {b1:.4f}")
57+
print(f"SSE: {SSE:.4f}, R²: {R2:.4f}")
58+
59+
# Plot
60+
plt.figure(figsize=(6, 4))
61+
plt.scatter(x, y, color="blue", label="Data Points")
62+
plt.plot(x, y_pred, "r-", label="Analytical Solution")
63+
plt.xlabel("x")
64+
plt.ylabel("y")
65+
plt.legend()
66+
plt.title(f"Linear Regression - {name}")
67+
plt.show()

0 commit comments

Comments
 (0)