|
| 1 | +""" |
| 2 | +Algorithm: Linear Regression using Gradient Descent (Batch & Stochastic) |
| 3 | +
|
| 4 | +Description: |
| 5 | +This script implements Linear Regression using both Batch Gradient Descent (BGD) |
| 6 | +and Stochastic Gradient Descent (SGD). |
| 7 | +
|
| 8 | +Linear Regression fits a straight line to data points (x, y) by minimizing |
| 9 | +the Mean Squared Error (MSE) between the predicted and actual values. |
| 10 | +
|
| 11 | +Gradient Descent iteratively adjusts parameters (b0 - intercept, b1 - slope) |
| 12 | +in the opposite direction of the gradient of the loss function. |
| 13 | +
|
| 14 | +Mathematical Formulation: |
| 15 | + y_pred = b0 + b1 * x |
| 16 | + Loss (MSE) = (1/n) * Σ(y - y_pred)² |
| 17 | + Gradients: |
| 18 | + ∂L/∂b0 = -(2/n) * Σ(y - y_pred) |
| 19 | + ∂L/∂b1 = -(2/n) * Σ(x * (y - y_pred)) |
| 20 | +
|
| 21 | +Variants: |
| 22 | +- Batch Gradient Descent: Uses all samples per iteration (stable but slower) |
| 23 | +- Stochastic Gradient Descent: Uses one random sample per iteration (faster but noisier) |
| 24 | +
|
| 25 | +Time Complexity: |
| 26 | + - Batch Gradient Descent: O(n * epochs) |
| 27 | + - Stochastic Gradient Descent: O(epochs) |
| 28 | +Space Complexity: O(1) |
| 29 | +""" |
| 30 | + |
| 31 | +import numpy as np |
| 32 | +import matplotlib.pyplot as plt |
| 33 | + |
| 34 | + |
| 35 | +def gradient_descent(x, y, lr=0.01, epochs=10000, tolerance=1e-6, stochastic=False): |
| 36 | + """ |
| 37 | + Performs linear regression using Gradient Descent. |
| 38 | + |
| 39 | + Parameters: |
| 40 | + x (np.ndarray): Feature values |
| 41 | + y (np.ndarray): Target values |
| 42 | + lr (float): Learning rate |
| 43 | + epochs (int): Maximum iterations |
| 44 | + tolerance (float): Convergence threshold |
| 45 | + stochastic (bool): If True, performs Stochastic Gradient Descent |
| 46 | + |
| 47 | + Returns: |
| 48 | + tuple: (b0, b1) - Intercept and Slope |
| 49 | + """ |
| 50 | + b0, b1 = 0, 0 |
| 51 | + n = len(x) |
| 52 | + |
| 53 | + for epoch in range(epochs): |
| 54 | + if stochastic: |
| 55 | + # Stochastic Gradient Descent (single random sample) |
| 56 | + i = np.random.randint(0, n) |
| 57 | + xi, yi = x[i], y[i] |
| 58 | + y_pred = b0 + b1 * xi |
| 59 | + b0_grad = -(yi - y_pred) |
| 60 | + b1_grad = -(yi - y_pred) * xi |
| 61 | + else: |
| 62 | + # Batch Gradient Descent (uses full dataset) |
| 63 | + y_pred = b0 + b1 * x |
| 64 | + b0_grad = -np.sum(y - y_pred) / n |
| 65 | + b1_grad = -np.sum((y - y_pred) * x) / n |
| 66 | + |
| 67 | + # Parameter updates |
| 68 | + b0_new = b0 - lr * b0_grad |
| 69 | + b1_new = b1 - lr * b1_grad |
| 70 | + |
| 71 | + # Convergence check |
| 72 | + if abs(b0_new - b0) < tolerance and abs(b1_new - b1) < tolerance: |
| 73 | + print(f"✅ Converged after {epoch} epochs") |
| 74 | + break |
| 75 | + |
| 76 | + b0, b1 = b0_new, b1_new |
| 77 | + |
| 78 | + return b0, b1 |
| 79 | + |
| 80 | + |
| 81 | +# Example Test Cases |
| 82 | +if __name__ == "__main__": |
| 83 | + test_cases = { |
| 84 | + "Case 1: Simple Linear": { |
| 85 | + "x": np.array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]), |
| 86 | + "y": np.array([1, 3, 2, 5, 7, 8, 8, 9, 10, 12]) |
| 87 | + }, |
| 88 | + "Case 2: Perfectly Linear": { |
| 89 | + "x": np.arange(0, 10), |
| 90 | + "y": 3 * np.arange(0, 10) + 2 # y = 3x + 2 |
| 91 | + }, |
| 92 | + "Case 3: Negative Slope": { |
| 93 | + "x": np.arange(0, 10), |
| 94 | + "y": 20 - 2 * np.arange(0, 10) # y = -2x + 20 |
| 95 | + }, |
| 96 | + "Case 4: Random Noise Added": { |
| 97 | + "x": np.arange(0, 10), |
| 98 | + "y": 5 * np.arange(0, 10) + np.random.normal(0, 3, 10) # y = 5x + noise |
| 99 | + } |
| 100 | + } |
| 101 | + |
| 102 | + for name, data in test_cases.items(): |
| 103 | + print(f"\n=== {name} ===") |
| 104 | + x, y = data["x"], data["y"] |
| 105 | + |
| 106 | + # Batch Gradient Descent |
| 107 | + b0_gd, b1_gd = gradient_descent(x, y, lr=0.01, epochs=10000, stochastic=False) |
| 108 | + y_pred_gd = b0_gd + b1_gd * x |
| 109 | + |
| 110 | + # Stochastic Gradient Descent |
| 111 | + b0_sgd, b1_sgd = gradient_descent(x, y, lr=0.01, epochs=10000, stochastic=True) |
| 112 | + y_pred_sgd = b0_sgd + b1_sgd * x |
| 113 | + |
| 114 | + # Metrics |
| 115 | + SST = np.sum((y - np.mean(y))**2) |
| 116 | + SSE_gd = np.sum((y - y_pred_gd)**2) |
| 117 | + SSE_sgd = np.sum((y - y_pred_sgd)**2) |
| 118 | + R2_gd = 1 - (SSE_gd / SST) |
| 119 | + R2_sgd = 1 - (SSE_sgd / SST) |
| 120 | + |
| 121 | + # Print results |
| 122 | + print(f"Batch GD: Intercept={b0_gd:.4f}, Slope={b1_gd:.4f}, R²={R2_gd:.4f}") |
| 123 | + print(f"SGD: Intercept={b0_sgd:.4f}, Slope={b1_sgd:.4f}, R²={R2_sgd:.4f}") |
| 124 | + |
| 125 | + # Visualization |
| 126 | + plt.figure(figsize=(6, 4)) |
| 127 | + plt.scatter(x, y, color="blue", label="Data Points") |
| 128 | + plt.plot(x, y_pred_gd, "g--", label="Batch GD") |
| 129 | + plt.plot(x, y_pred_sgd, "m:", label="Stochastic GD") |
| 130 | + plt.xlabel("x") |
| 131 | + plt.ylabel("y") |
| 132 | + plt.legend() |
| 133 | + plt.title(f"{name}") |
| 134 | + plt.show() |
0 commit comments