|
| 1 | +""" |
| 2 | +Algorithm: Linear Regression using Gradient Descent (Batch & Stochastic) |
| 3 | +
|
| 4 | +Description: |
| 5 | +This script implements Linear Regression using both Batch Gradient Descent (BGD) |
| 6 | +and Stochastic Gradient Descent (SGD). |
| 7 | +
|
| 8 | +Linear Regression fits a straight line to data points (x, y) by minimizing |
| 9 | +the Mean Squared Error (MSE) between the predicted and actual values. |
| 10 | +
|
| 11 | +Gradient Descent iteratively adjusts parameters (b0 - intercept, b1 - slope) |
| 12 | +in the opposite direction of the gradient of the loss function. |
| 13 | +
|
| 14 | +Mathematical Formulation: |
| 15 | + y_pred = b0 + b1 * x |
| 16 | + Loss (MSE) = (1/n) * Σ(y - y_pred)² |
| 17 | + Gradients: |
| 18 | + ∂L/∂b0 = -(2/n) * Σ(y - y_pred) |
| 19 | + ∂L/∂b1 = -(2/n) * Σ(x * (y - y_pred)) |
| 20 | +
|
| 21 | +Variants: |
| 22 | +- Batch Gradient Descent: Uses all samples per iteration (stable but slower) |
| 23 | +- Stochastic Gradient Descent: Uses one random sample per iteration (faster but noisier) |
| 24 | +
|
| 25 | +Time Complexity: |
| 26 | + - Batch Gradient Descent: O(n * epochs) |
| 27 | + - Stochastic Gradient Descent: O(epochs) |
| 28 | +Space Complexity: O(1) |
| 29 | +""" |
| 30 | + |
1 | 31 | import numpy as np |
2 | 32 | import matplotlib.pyplot as plt |
3 | 33 |
|
4 | | -# Linear Regression using Gradient Descent (Full-Batch and SGD) |
5 | 34 |
|
6 | | -# Dataset |
7 | | -x = np.array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]) |
8 | | -y = np.array([1, 3, 2, 5, 7, 8, 8, 9, 10, 12]) |
9 | | - |
10 | | -# Gradient Descent Function |
11 | 35 | def gradient_descent(x, y, lr=0.01, epochs=10000, tolerance=1e-6, stochastic=False): |
| 36 | + """ |
| 37 | + Performs linear regression using Gradient Descent. |
| 38 | + |
| 39 | + Parameters: |
| 40 | + x (np.ndarray): Feature values |
| 41 | + y (np.ndarray): Target values |
| 42 | + lr (float): Learning rate |
| 43 | + epochs (int): Maximum iterations |
| 44 | + tolerance (float): Convergence threshold |
| 45 | + stochastic (bool): If True, performs Stochastic Gradient Descent |
| 46 | + |
| 47 | + Returns: |
| 48 | + tuple: (b0, b1) - Intercept and Slope |
| 49 | + """ |
12 | 50 | b0, b1 = 0, 0 |
13 | 51 | n = len(x) |
14 | 52 |
|
15 | 53 | for epoch in range(epochs): |
16 | 54 | if stochastic: |
17 | | - # Stochastic (single sample) |
| 55 | + # Stochastic Gradient Descent (single random sample) |
18 | 56 | i = np.random.randint(0, n) |
19 | 57 | xi, yi = x[i], y[i] |
20 | 58 | y_pred = b0 + b1 * xi |
21 | 59 | b0_grad = -(yi - y_pred) |
22 | 60 | b1_grad = -(yi - y_pred) * xi |
23 | 61 | else: |
24 | | - # Batch Gradient Descent |
| 62 | + # Batch Gradient Descent (uses full dataset) |
25 | 63 | y_pred = b0 + b1 * x |
26 | 64 | b0_grad = -np.sum(y - y_pred) / n |
27 | 65 | b1_grad = -np.sum((y - y_pred) * x) / n |
28 | 66 |
|
29 | | - # Update parameters |
| 67 | + # Parameter updates |
30 | 68 | b0_new = b0 - lr * b0_grad |
31 | 69 | b1_new = b1 - lr * b1_grad |
32 | 70 |
|
33 | | - # Check convergence |
| 71 | + # Convergence check |
34 | 72 | if abs(b0_new - b0) < tolerance and abs(b1_new - b1) < tolerance: |
| 73 | + print(f"✅ Converged after {epoch} epochs") |
35 | 74 | break |
36 | 75 |
|
37 | 76 | b0, b1 = b0_new, b1_new |
38 | 77 |
|
39 | 78 | return b0, b1 |
40 | 79 |
|
41 | 80 |
|
42 | | -# Full-Batch Gradient Descent |
43 | | -b0_gd, b1_gd = gradient_descent(x, y, lr=0.01, epochs=10000, stochastic=False) |
44 | | -y_pred_gd = b0_gd + b1_gd * x |
45 | | - |
46 | | -# Stochastic Gradient Descent |
47 | | -b0_sgd, b1_sgd = gradient_descent(x, y, lr=0.01, epochs=10000, stochastic=True) |
48 | | -y_pred_sgd = b0_sgd + b1_sgd * x |
49 | | - |
50 | | -# Compute metrics |
51 | | -SST = np.sum((y - np.mean(y))**2) |
52 | | -SSE_gd = np.sum((y - y_pred_gd)**2) |
53 | | -SSE_sgd = np.sum((y - y_pred_sgd)**2) |
54 | | -R2_gd = 1 - (SSE_gd / SST) |
55 | | -R2_sgd = 1 - (SSE_sgd / SST) |
56 | | - |
57 | | -# Print results |
58 | | -print("=== Gradient Descent (Full-Batch) ===") |
59 | | -print(f"Intercept (b0): {b0_gd:.4f}, Slope (b1): {b1_gd:.4f}") |
60 | | -print(f"SSE: {SSE_gd:.4f}, R²: {R2_gd:.4f}") |
61 | | - |
62 | | -print("\n=== Gradient Descent (Stochastic) ===") |
63 | | -print(f"Intercept (b0): {b0_sgd:.4f}, Slope (b1): {b1_sgd:.4f}") |
64 | | -print(f"SSE: {SSE_sgd:.4f}, R²: {R2_sgd:.4f}") |
65 | | - |
66 | | -# Plot comparison |
67 | | -plt.scatter(x, y, color="blue", label="Data") |
68 | | -plt.plot(x, y_pred_gd, "g--", label="Batch Gradient Descent") |
69 | | -plt.plot(x, y_pred_sgd, "m:", label="Stochastic GD") |
70 | | -plt.xlabel("x") |
71 | | -plt.ylabel("y") |
72 | | -plt.legend() |
73 | | -plt.title("Linear Regression using Gradient Descent (Batch & Stochastic)") |
74 | | -plt.show() |
| 81 | +# Example Test Cases |
| 82 | +if __name__ == "__main__": |
| 83 | + test_cases = { |
| 84 | + "Case 1: Simple Linear": { |
| 85 | + "x": np.array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]), |
| 86 | + "y": np.array([1, 3, 2, 5, 7, 8, 8, 9, 10, 12]) |
| 87 | + }, |
| 88 | + "Case 2: Perfectly Linear": { |
| 89 | + "x": np.arange(0, 10), |
| 90 | + "y": 3 * np.arange(0, 10) + 2 # y = 3x + 2 |
| 91 | + }, |
| 92 | + "Case 3: Negative Slope": { |
| 93 | + "x": np.arange(0, 10), |
| 94 | + "y": 20 - 2 * np.arange(0, 10) # y = -2x + 20 |
| 95 | + }, |
| 96 | + "Case 4: Random Noise Added": { |
| 97 | + "x": np.arange(0, 10), |
| 98 | + "y": 5 * np.arange(0, 10) + np.random.normal(0, 3, 10) # y = 5x + noise |
| 99 | + } |
| 100 | + } |
| 101 | + |
| 102 | + for name, data in test_cases.items(): |
| 103 | + print(f"\n=== {name} ===") |
| 104 | + x, y = data["x"], data["y"] |
| 105 | + |
| 106 | + # Batch Gradient Descent |
| 107 | + b0_gd, b1_gd = gradient_descent(x, y, lr=0.01, epochs=10000, stochastic=False) |
| 108 | + y_pred_gd = b0_gd + b1_gd * x |
| 109 | + |
| 110 | + # Stochastic Gradient Descent |
| 111 | + b0_sgd, b1_sgd = gradient_descent(x, y, lr=0.01, epochs=10000, stochastic=True) |
| 112 | + y_pred_sgd = b0_sgd + b1_sgd * x |
| 113 | + |
| 114 | + # Metrics |
| 115 | + SST = np.sum((y - np.mean(y))**2) |
| 116 | + SSE_gd = np.sum((y - y_pred_gd)**2) |
| 117 | + SSE_sgd = np.sum((y - y_pred_sgd)**2) |
| 118 | + R2_gd = 1 - (SSE_gd / SST) |
| 119 | + R2_sgd = 1 - (SSE_sgd / SST) |
| 120 | + |
| 121 | + # Print results |
| 122 | + print(f"Batch GD: Intercept={b0_gd:.4f}, Slope={b1_gd:.4f}, R²={R2_gd:.4f}") |
| 123 | + print(f"SGD: Intercept={b0_sgd:.4f}, Slope={b1_sgd:.4f}, R²={R2_sgd:.4f}") |
| 124 | + |
| 125 | + # Visualization |
| 126 | + plt.figure(figsize=(6, 4)) |
| 127 | + plt.scatter(x, y, color="blue", label="Data Points") |
| 128 | + plt.plot(x, y_pred_gd, "g--", label="Batch GD") |
| 129 | + plt.plot(x, y_pred_sgd, "m:", label="Stochastic GD") |
| 130 | + plt.xlabel("x") |
| 131 | + plt.ylabel("y") |
| 132 | + plt.legend() |
| 133 | + plt.title(f"{name}") |
| 134 | + plt.show() |
0 commit comments