From df4a6b5249eb808ddf8caec73b0a51ed4e86c2c7 Mon Sep 17 00:00:00 2001 From: lck6055 Date: Sat, 4 Oct 2025 15:07:09 +0530 Subject: [PATCH 1/3] Added new Directory Ml and implemented basic algos --- Python/machine_learning/gradient_descent.py | 74 ++++++++++++++++++++ Python/machine_learning/linear_regression.py | 39 +++++++++++ 2 files changed, 113 insertions(+) create mode 100644 Python/machine_learning/gradient_descent.py create mode 100644 Python/machine_learning/linear_regression.py diff --git a/Python/machine_learning/gradient_descent.py b/Python/machine_learning/gradient_descent.py new file mode 100644 index 00000000..65c61102 --- /dev/null +++ b/Python/machine_learning/gradient_descent.py @@ -0,0 +1,74 @@ +import numpy as np +import matplotlib.pyplot as plt + +# Linear Regression using Gradient Descent (Full-Batch and SGD) + +# Dataset +x = np.array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]) +y = np.array([1, 3, 2, 5, 7, 8, 8, 9, 10, 12]) + +# Gradient Descent Function +def gradient_descent(x, y, lr=0.01, epochs=10000, tolerance=1e-6, stochastic=False): + b0, b1 = 0, 0 + n = len(x) + + for epoch in range(epochs): + if stochastic: + # Stochastic (single sample) + i = np.random.randint(0, n) + xi, yi = x[i], y[i] + y_pred = b0 + b1 * xi + b0_grad = -(yi - y_pred) + b1_grad = -(yi - y_pred) * xi + else: + # Batch Gradient Descent + y_pred = b0 + b1 * x + b0_grad = -np.sum(y - y_pred) / n + b1_grad = -np.sum((y - y_pred) * x) / n + + # Update parameters + b0_new = b0 - lr * b0_grad + b1_new = b1 - lr * b1_grad + + # Check convergence + if abs(b0_new - b0) < tolerance and abs(b1_new - b1) < tolerance: + break + + b0, b1 = b0_new, b1_new + + return b0, b1 + + +# Full-Batch Gradient Descent +b0_gd, b1_gd = gradient_descent(x, y, lr=0.01, epochs=10000, stochastic=False) +y_pred_gd = b0_gd + b1_gd * x + +# Stochastic Gradient Descent +b0_sgd, b1_sgd = gradient_descent(x, y, lr=0.01, epochs=10000, stochastic=True) +y_pred_sgd = b0_sgd + b1_sgd * x + +# Compute metrics +SST = np.sum((y - np.mean(y))**2) +SSE_gd = np.sum((y - y_pred_gd)**2) +SSE_sgd = np.sum((y - y_pred_sgd)**2) +R2_gd = 1 - (SSE_gd / SST) +R2_sgd = 1 - (SSE_sgd / SST) + +# Print results +print("=== Gradient Descent (Full-Batch) ===") +print(f"Intercept (b0): {b0_gd:.4f}, Slope (b1): {b1_gd:.4f}") +print(f"SSE: {SSE_gd:.4f}, R²: {R2_gd:.4f}") + +print("\n=== Gradient Descent (Stochastic) ===") +print(f"Intercept (b0): {b0_sgd:.4f}, Slope (b1): {b1_sgd:.4f}") +print(f"SSE: {SSE_sgd:.4f}, R²: {R2_sgd:.4f}") + +# Plot comparison +plt.scatter(x, y, color="blue", label="Data") +plt.plot(x, y_pred_gd, "g--", label="Batch Gradient Descent") +plt.plot(x, y_pred_sgd, "m:", label="Stochastic GD") +plt.xlabel("x") +plt.ylabel("y") +plt.legend() +plt.title("Linear Regression using Gradient Descent (Batch & Stochastic)") +plt.show() diff --git a/Python/machine_learning/linear_regression.py b/Python/machine_learning/linear_regression.py new file mode 100644 index 00000000..ff9aa1f0 --- /dev/null +++ b/Python/machine_learning/linear_regression.py @@ -0,0 +1,39 @@ +import numpy as np +import matplotlib.pyplot as plt + +# --- Linear Regression using Analytical (Closed-form) Solution --- + +# Dataset +x = np.array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]) +y = np.array([1, 3, 2, 5, 7, 8, 8, 9, 10, 12]) + +# Mean values +x_mean, y_mean = np.mean(x), np.mean(y) + +# Compute coefficients (Closed-form) +b1 = np.sum((x - x_mean) * (y - y_mean)) / np.sum((x - x_mean)**2) +b0 = y_mean - b1 * x_mean + +# Predictions +y_pred = b0 + b1 * x + +# Compute SSE and R² +SSE = np.sum((y - y_pred)**2) +SST = np.sum((y - y_mean)**2) +R2 = 1 - (SSE / SST) + +# Print results +print("=== Linear Regression (Analytical Solution) ===") +print(f"Intercept (b0): {b0:.4f}") +print(f"Slope (b1): {b1:.4f}") +print(f"SSE: {SSE:.4f}") +print(f"R²: {R2:.4f}") + +# Plot results +plt.scatter(x, y, color="blue", label="Data") +plt.plot(x, y_pred, "r-", label="Analytical Solution") +plt.xlabel("x") +plt.ylabel("y") +plt.legend() +plt.title("Linear Regression - Analytical Solution") +plt.show() From 5fb638dfd869ceb04b8c04a3f7d1f37d5991fb0d Mon Sep 17 00:00:00 2001 From: lck6055 Date: Sat, 4 Oct 2025 15:17:02 +0530 Subject: [PATCH 2/3] Updated linear regression and gradient descent files with complexity and test cases --- Python/machine_learning/gradient_descent.py | 146 +++++++++++++------ Python/machine_learning/linear_regression.py | 100 ++++++++----- 2 files changed, 167 insertions(+), 79 deletions(-) diff --git a/Python/machine_learning/gradient_descent.py b/Python/machine_learning/gradient_descent.py index 65c61102..1fa7a14b 100644 --- a/Python/machine_learning/gradient_descent.py +++ b/Python/machine_learning/gradient_descent.py @@ -1,37 +1,76 @@ +""" +Algorithm: Linear Regression using Gradient Descent (Batch & Stochastic) + +Description: +This script implements Linear Regression using both Batch Gradient Descent (BGD) +and Stochastic Gradient Descent (SGD). + +Linear Regression fits a straight line to data points (x, y) by minimizing +the Mean Squared Error (MSE) between the predicted and actual values. + +Gradient Descent iteratively adjusts parameters (b0 - intercept, b1 - slope) +in the opposite direction of the gradient of the loss function. + +Mathematical Formulation: + y_pred = b0 + b1 * x + Loss (MSE) = (1/n) * Σ(y - y_pred)² + Gradients: + ∂L/∂b0 = -(2/n) * Σ(y - y_pred) + ∂L/∂b1 = -(2/n) * Σ(x * (y - y_pred)) + +Variants: +- Batch Gradient Descent: Uses all samples per iteration (stable but slower) +- Stochastic Gradient Descent: Uses one random sample per iteration (faster but noisier) + +Time Complexity: + - Batch Gradient Descent: O(n * epochs) + - Stochastic Gradient Descent: O(epochs) +Space Complexity: O(1) +""" + import numpy as np import matplotlib.pyplot as plt -# Linear Regression using Gradient Descent (Full-Batch and SGD) -# Dataset -x = np.array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]) -y = np.array([1, 3, 2, 5, 7, 8, 8, 9, 10, 12]) - -# Gradient Descent Function def gradient_descent(x, y, lr=0.01, epochs=10000, tolerance=1e-6, stochastic=False): + """ + Performs linear regression using Gradient Descent. + + Parameters: + x (np.ndarray): Feature values + y (np.ndarray): Target values + lr (float): Learning rate + epochs (int): Maximum iterations + tolerance (float): Convergence threshold + stochastic (bool): If True, performs Stochastic Gradient Descent + + Returns: + tuple: (b0, b1) - Intercept and Slope + """ b0, b1 = 0, 0 n = len(x) for epoch in range(epochs): if stochastic: - # Stochastic (single sample) + # Stochastic Gradient Descent (single random sample) i = np.random.randint(0, n) xi, yi = x[i], y[i] y_pred = b0 + b1 * xi b0_grad = -(yi - y_pred) b1_grad = -(yi - y_pred) * xi else: - # Batch Gradient Descent + # Batch Gradient Descent (uses full dataset) y_pred = b0 + b1 * x b0_grad = -np.sum(y - y_pred) / n b1_grad = -np.sum((y - y_pred) * x) / n - # Update parameters + # Parameter updates b0_new = b0 - lr * b0_grad b1_new = b1 - lr * b1_grad - # Check convergence + # Convergence check if abs(b0_new - b0) < tolerance and abs(b1_new - b1) < tolerance: + print(f"✅ Converged after {epoch} epochs") break b0, b1 = b0_new, b1_new @@ -39,36 +78,57 @@ def gradient_descent(x, y, lr=0.01, epochs=10000, tolerance=1e-6, stochastic=Fal return b0, b1 -# Full-Batch Gradient Descent -b0_gd, b1_gd = gradient_descent(x, y, lr=0.01, epochs=10000, stochastic=False) -y_pred_gd = b0_gd + b1_gd * x - -# Stochastic Gradient Descent -b0_sgd, b1_sgd = gradient_descent(x, y, lr=0.01, epochs=10000, stochastic=True) -y_pred_sgd = b0_sgd + b1_sgd * x - -# Compute metrics -SST = np.sum((y - np.mean(y))**2) -SSE_gd = np.sum((y - y_pred_gd)**2) -SSE_sgd = np.sum((y - y_pred_sgd)**2) -R2_gd = 1 - (SSE_gd / SST) -R2_sgd = 1 - (SSE_sgd / SST) - -# Print results -print("=== Gradient Descent (Full-Batch) ===") -print(f"Intercept (b0): {b0_gd:.4f}, Slope (b1): {b1_gd:.4f}") -print(f"SSE: {SSE_gd:.4f}, R²: {R2_gd:.4f}") - -print("\n=== Gradient Descent (Stochastic) ===") -print(f"Intercept (b0): {b0_sgd:.4f}, Slope (b1): {b1_sgd:.4f}") -print(f"SSE: {SSE_sgd:.4f}, R²: {R2_sgd:.4f}") - -# Plot comparison -plt.scatter(x, y, color="blue", label="Data") -plt.plot(x, y_pred_gd, "g--", label="Batch Gradient Descent") -plt.plot(x, y_pred_sgd, "m:", label="Stochastic GD") -plt.xlabel("x") -plt.ylabel("y") -plt.legend() -plt.title("Linear Regression using Gradient Descent (Batch & Stochastic)") -plt.show() +# Example Test Cases +if __name__ == "__main__": + test_cases = { + "Case 1: Simple Linear": { + "x": np.array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]), + "y": np.array([1, 3, 2, 5, 7, 8, 8, 9, 10, 12]) + }, + "Case 2: Perfectly Linear": { + "x": np.arange(0, 10), + "y": 3 * np.arange(0, 10) + 2 # y = 3x + 2 + }, + "Case 3: Negative Slope": { + "x": np.arange(0, 10), + "y": 20 - 2 * np.arange(0, 10) # y = -2x + 20 + }, + "Case 4: Random Noise Added": { + "x": np.arange(0, 10), + "y": 5 * np.arange(0, 10) + np.random.normal(0, 3, 10) # y = 5x + noise + } + } + + for name, data in test_cases.items(): + print(f"\n=== {name} ===") + x, y = data["x"], data["y"] + + # Batch Gradient Descent + b0_gd, b1_gd = gradient_descent(x, y, lr=0.01, epochs=10000, stochastic=False) + y_pred_gd = b0_gd + b1_gd * x + + # Stochastic Gradient Descent + b0_sgd, b1_sgd = gradient_descent(x, y, lr=0.01, epochs=10000, stochastic=True) + y_pred_sgd = b0_sgd + b1_sgd * x + + # Metrics + SST = np.sum((y - np.mean(y))**2) + SSE_gd = np.sum((y - y_pred_gd)**2) + SSE_sgd = np.sum((y - y_pred_sgd)**2) + R2_gd = 1 - (SSE_gd / SST) + R2_sgd = 1 - (SSE_sgd / SST) + + # Print results + print(f"Batch GD: Intercept={b0_gd:.4f}, Slope={b1_gd:.4f}, R²={R2_gd:.4f}") + print(f"SGD: Intercept={b0_sgd:.4f}, Slope={b1_sgd:.4f}, R²={R2_sgd:.4f}") + + # Visualization + plt.figure(figsize=(6, 4)) + plt.scatter(x, y, color="blue", label="Data Points") + plt.plot(x, y_pred_gd, "g--", label="Batch GD") + plt.plot(x, y_pred_sgd, "m:", label="Stochastic GD") + plt.xlabel("x") + plt.ylabel("y") + plt.legend() + plt.title(f"{name}") + plt.show() diff --git a/Python/machine_learning/linear_regression.py b/Python/machine_learning/linear_regression.py index ff9aa1f0..32eb26ec 100644 --- a/Python/machine_learning/linear_regression.py +++ b/Python/machine_learning/linear_regression.py @@ -1,39 +1,67 @@ +""" +Algorithm: Linear Regression (Analytical / Closed-form Solution) + +Description: +This script implements Linear Regression using the closed-form solution (Analytical method). +It calculates the best-fit line for a dataset by directly computing the slope (b1) and intercept (b0) +using the formulas derived from minimizing the Mean Squared Error (MSE). + +Mathematical Formulation: + y_pred = b0 + b1 * x + b1 = Σ((x - mean(x)) * (y - mean(y))) / Σ((x - mean(x))^2) + b0 = mean(y) - b1 * mean(x) + +Time Complexity: O(n) # Single pass through the data +Space Complexity: O(1) # Only a few variables stored +""" + import numpy as np import matplotlib.pyplot as plt -# --- Linear Regression using Analytical (Closed-form) Solution --- - -# Dataset -x = np.array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]) -y = np.array([1, 3, 2, 5, 7, 8, 8, 9, 10, 12]) - -# Mean values -x_mean, y_mean = np.mean(x), np.mean(y) - -# Compute coefficients (Closed-form) -b1 = np.sum((x - x_mean) * (y - y_mean)) / np.sum((x - x_mean)**2) -b0 = y_mean - b1 * x_mean - -# Predictions -y_pred = b0 + b1 * x - -# Compute SSE and R² -SSE = np.sum((y - y_pred)**2) -SST = np.sum((y - y_mean)**2) -R2 = 1 - (SSE / SST) - -# Print results -print("=== Linear Regression (Analytical Solution) ===") -print(f"Intercept (b0): {b0:.4f}") -print(f"Slope (b1): {b1:.4f}") -print(f"SSE: {SSE:.4f}") -print(f"R²: {R2:.4f}") - -# Plot results -plt.scatter(x, y, color="blue", label="Data") -plt.plot(x, y_pred, "r-", label="Analytical Solution") -plt.xlabel("x") -plt.ylabel("y") -plt.legend() -plt.title("Linear Regression - Analytical Solution") -plt.show() +# Function for Analytical Linear Regression +def linear_regression_analytical(x, y): + x_mean, y_mean = np.mean(x), np.mean(y) + b1 = np.sum((x - x_mean) * (y - y_mean)) / np.sum((x - x_mean)**2) + b0 = y_mean - b1 * x_mean + y_pred = b0 + b1 * x + SSE = np.sum((y - y_pred)**2) + SST = np.sum((y - y_mean)**2) + R2 = 1 - (SSE / SST) + return b0, b1, y_pred, SSE, R2 + +# Test Cases +test_cases = { + "Simple Linear": { + "x": np.array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]), + "y": np.array([1, 3, 2, 5, 7, 8, 8, 9, 10, 12]) + }, + "Perfectly Linear": { + "x": np.arange(0, 10), + "y": 3 * np.arange(0, 10) + 2 # y = 3x + 2 + }, + "Negative Slope": { + "x": np.arange(0, 10), + "y": 20 - 2 * np.arange(0, 10) # y = -2x + 20 + }, + "Noisy Data": { + "x": np.arange(0, 10), + "y": 5 * np.arange(0, 10) + np.random.normal(0, 3, 10) # y = 5x + noise + } +} + +for name, data in test_cases.items(): + print(f"\n=== {name} ===") + x, y = data["x"], data["y"] + b0, b1, y_pred, SSE, R2 = linear_regression_analytical(x, y) + print(f"Intercept (b0): {b0:.4f}, Slope (b1): {b1:.4f}") + print(f"SSE: {SSE:.4f}, R²: {R2:.4f}") + + # Plot + plt.figure(figsize=(6, 4)) + plt.scatter(x, y, color="blue", label="Data Points") + plt.plot(x, y_pred, "r-", label="Analytical Solution") + plt.xlabel("x") + plt.ylabel("y") + plt.legend() + plt.title(f"Linear Regression - {name}") + plt.show() From b3fcd31fef33eb52307ff73aad59412b45442090 Mon Sep 17 00:00:00 2001 From: lck6055 Date: Sat, 4 Oct 2025 15:54:01 +0530 Subject: [PATCH 3/3] updated linear_regression --- Python/machine_learning/linear_regression.py | 59 ++++++++++++++------ 1 file changed, 42 insertions(+), 17 deletions(-) diff --git a/Python/machine_learning/linear_regression.py b/Python/machine_learning/linear_regression.py index 32eb26ec..dcd7c8d1 100644 --- a/Python/machine_learning/linear_regression.py +++ b/Python/machine_learning/linear_regression.py @@ -2,34 +2,54 @@ Algorithm: Linear Regression (Analytical / Closed-form Solution) Description: -This script implements Linear Regression using the closed-form solution (Analytical method). -It calculates the best-fit line for a dataset by directly computing the slope (b1) and intercept (b0) -using the formulas derived from minimizing the Mean Squared Error (MSE). +This script implements Linear Regression using the closed-form solution. +It calculates the best-fit line by directly computing the slope (b1) and intercept (b0) +using formulas derived from minimizing the Mean Squared Error (MSE). -Mathematical Formulation: - y_pred = b0 + b1 * x - b1 = Σ((x - mean(x)) * (y - mean(y))) / Σ((x - mean(x))^2) - b0 = mean(y) - b1 * mean(x) - -Time Complexity: O(n) # Single pass through the data +Time Complexity: O(n) # Single pass through data Space Complexity: O(1) # Only a few variables stored """ import numpy as np import matplotlib.pyplot as plt -# Function for Analytical Linear Regression +# --- Function for Analytical Linear Regression --- def linear_regression_analytical(x, y): + """ + Computes linear regression coefficients using the analytical method. + + Parameters: + x (np.ndarray): Feature values + y (np.ndarray): Target values + + Returns: + tuple: b0 (intercept), b1 (slope), y_pred (predictions), SSE, R² + """ + # Compute mean of x and y x_mean, y_mean = np.mean(x), np.mean(y) + + # Compute slope (b1) using formula b1 = np.sum((x - x_mean) * (y - y_mean)) / np.sum((x - x_mean)**2) + + # Compute intercept (b0) using formula b0 = y_mean - b1 * x_mean + + # Compute predicted y values y_pred = b0 + b1 * x + + # Compute Sum of Squared Errors (SSE) SSE = np.sum((y - y_pred)**2) + + # Compute Total Sum of Squares (SST) for R² SST = np.sum((y - y_mean)**2) + + # Compute R² score (coefficient of determination) R2 = 1 - (SSE / SST) + return b0, b1, y_pred, SSE, R2 -# Test Cases + +# --- Test Cases --- test_cases = { "Simple Linear": { "x": np.array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]), @@ -49,19 +69,24 @@ def linear_regression_analytical(x, y): } } +# Loop through each test case for name, data in test_cases.items(): print(f"\n=== {name} ===") x, y = data["x"], data["y"] + + # Compute linear regression using analytical method b0, b1, y_pred, SSE, R2 = linear_regression_analytical(x, y) + + # Print coefficients and metrics print(f"Intercept (b0): {b0:.4f}, Slope (b1): {b1:.4f}") print(f"SSE: {SSE:.4f}, R²: {R2:.4f}") - # Plot + # Plot data points and fitted line plt.figure(figsize=(6, 4)) - plt.scatter(x, y, color="blue", label="Data Points") - plt.plot(x, y_pred, "r-", label="Analytical Solution") - plt.xlabel("x") - plt.ylabel("y") + plt.scatter(x, y, color="blue", label="Data Points") # Original data points + plt.plot(x, y_pred, "r-", label="Analytical Solution") # Fitted line + plt.xlabel("x") # x-axis label + plt.ylabel("y") # y-axis label plt.legend() - plt.title(f"Linear Regression - {name}") + plt.title(f"Linear Regression - {name}") # Plot title plt.show()