Skip to content

Commit b4af693

Browse files
Merge pull request #400 from lck6055/machine_learning
Added new Directory Ml and implemented basic algos
2 parents 3443cf2 + b3fcd31 commit b4af693

2 files changed

Lines changed: 226 additions & 0 deletions

File tree

Lines changed: 134 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,134 @@
1+
"""
2+
Algorithm: Linear Regression using Gradient Descent (Batch & Stochastic)
3+
4+
Description:
5+
This script implements Linear Regression using both Batch Gradient Descent (BGD)
6+
and Stochastic Gradient Descent (SGD).
7+
8+
Linear Regression fits a straight line to data points (x, y) by minimizing
9+
the Mean Squared Error (MSE) between the predicted and actual values.
10+
11+
Gradient Descent iteratively adjusts parameters (b0 - intercept, b1 - slope)
12+
in the opposite direction of the gradient of the loss function.
13+
14+
Mathematical Formulation:
15+
y_pred = b0 + b1 * x
16+
Loss (MSE) = (1/n) * Σ(y - y_pred)²
17+
Gradients:
18+
∂L/∂b0 = -(2/n) * Σ(y - y_pred)
19+
∂L/∂b1 = -(2/n) * Σ(x * (y - y_pred))
20+
21+
Variants:
22+
- Batch Gradient Descent: Uses all samples per iteration (stable but slower)
23+
- Stochastic Gradient Descent: Uses one random sample per iteration (faster but noisier)
24+
25+
Time Complexity:
26+
- Batch Gradient Descent: O(n * epochs)
27+
- Stochastic Gradient Descent: O(epochs)
28+
Space Complexity: O(1)
29+
"""
30+
31+
import numpy as np
32+
import matplotlib.pyplot as plt
33+
34+
35+
def gradient_descent(x, y, lr=0.01, epochs=10000, tolerance=1e-6, stochastic=False):
36+
"""
37+
Performs linear regression using Gradient Descent.
38+
39+
Parameters:
40+
x (np.ndarray): Feature values
41+
y (np.ndarray): Target values
42+
lr (float): Learning rate
43+
epochs (int): Maximum iterations
44+
tolerance (float): Convergence threshold
45+
stochastic (bool): If True, performs Stochastic Gradient Descent
46+
47+
Returns:
48+
tuple: (b0, b1) - Intercept and Slope
49+
"""
50+
b0, b1 = 0, 0
51+
n = len(x)
52+
53+
for epoch in range(epochs):
54+
if stochastic:
55+
# Stochastic Gradient Descent (single random sample)
56+
i = np.random.randint(0, n)
57+
xi, yi = x[i], y[i]
58+
y_pred = b0 + b1 * xi
59+
b0_grad = -(yi - y_pred)
60+
b1_grad = -(yi - y_pred) * xi
61+
else:
62+
# Batch Gradient Descent (uses full dataset)
63+
y_pred = b0 + b1 * x
64+
b0_grad = -np.sum(y - y_pred) / n
65+
b1_grad = -np.sum((y - y_pred) * x) / n
66+
67+
# Parameter updates
68+
b0_new = b0 - lr * b0_grad
69+
b1_new = b1 - lr * b1_grad
70+
71+
# Convergence check
72+
if abs(b0_new - b0) < tolerance and abs(b1_new - b1) < tolerance:
73+
print(f"✅ Converged after {epoch} epochs")
74+
break
75+
76+
b0, b1 = b0_new, b1_new
77+
78+
return b0, b1
79+
80+
81+
# Example Test Cases
82+
if __name__ == "__main__":
83+
test_cases = {
84+
"Case 1: Simple Linear": {
85+
"x": np.array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]),
86+
"y": np.array([1, 3, 2, 5, 7, 8, 8, 9, 10, 12])
87+
},
88+
"Case 2: Perfectly Linear": {
89+
"x": np.arange(0, 10),
90+
"y": 3 * np.arange(0, 10) + 2 # y = 3x + 2
91+
},
92+
"Case 3: Negative Slope": {
93+
"x": np.arange(0, 10),
94+
"y": 20 - 2 * np.arange(0, 10) # y = -2x + 20
95+
},
96+
"Case 4: Random Noise Added": {
97+
"x": np.arange(0, 10),
98+
"y": 5 * np.arange(0, 10) + np.random.normal(0, 3, 10) # y = 5x + noise
99+
}
100+
}
101+
102+
for name, data in test_cases.items():
103+
print(f"\n=== {name} ===")
104+
x, y = data["x"], data["y"]
105+
106+
# Batch Gradient Descent
107+
b0_gd, b1_gd = gradient_descent(x, y, lr=0.01, epochs=10000, stochastic=False)
108+
y_pred_gd = b0_gd + b1_gd * x
109+
110+
# Stochastic Gradient Descent
111+
b0_sgd, b1_sgd = gradient_descent(x, y, lr=0.01, epochs=10000, stochastic=True)
112+
y_pred_sgd = b0_sgd + b1_sgd * x
113+
114+
# Metrics
115+
SST = np.sum((y - np.mean(y))**2)
116+
SSE_gd = np.sum((y - y_pred_gd)**2)
117+
SSE_sgd = np.sum((y - y_pred_sgd)**2)
118+
R2_gd = 1 - (SSE_gd / SST)
119+
R2_sgd = 1 - (SSE_sgd / SST)
120+
121+
# Print results
122+
print(f"Batch GD: Intercept={b0_gd:.4f}, Slope={b1_gd:.4f}, R²={R2_gd:.4f}")
123+
print(f"SGD: Intercept={b0_sgd:.4f}, Slope={b1_sgd:.4f}, R²={R2_sgd:.4f}")
124+
125+
# Visualization
126+
plt.figure(figsize=(6, 4))
127+
plt.scatter(x, y, color="blue", label="Data Points")
128+
plt.plot(x, y_pred_gd, "g--", label="Batch GD")
129+
plt.plot(x, y_pred_sgd, "m:", label="Stochastic GD")
130+
plt.xlabel("x")
131+
plt.ylabel("y")
132+
plt.legend()
133+
plt.title(f"{name}")
134+
plt.show()
Lines changed: 92 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,92 @@
1+
"""
2+
Algorithm: Linear Regression (Analytical / Closed-form Solution)
3+
4+
Description:
5+
This script implements Linear Regression using the closed-form solution.
6+
It calculates the best-fit line by directly computing the slope (b1) and intercept (b0)
7+
using formulas derived from minimizing the Mean Squared Error (MSE).
8+
9+
Time Complexity: O(n) # Single pass through data
10+
Space Complexity: O(1) # Only a few variables stored
11+
"""
12+
13+
import numpy as np
14+
import matplotlib.pyplot as plt
15+
16+
# --- Function for Analytical Linear Regression ---
17+
def linear_regression_analytical(x, y):
18+
"""
19+
Computes linear regression coefficients using the analytical method.
20+
21+
Parameters:
22+
x (np.ndarray): Feature values
23+
y (np.ndarray): Target values
24+
25+
Returns:
26+
tuple: b0 (intercept), b1 (slope), y_pred (predictions), SSE, R²
27+
"""
28+
# Compute mean of x and y
29+
x_mean, y_mean = np.mean(x), np.mean(y)
30+
31+
# Compute slope (b1) using formula
32+
b1 = np.sum((x - x_mean) * (y - y_mean)) / np.sum((x - x_mean)**2)
33+
34+
# Compute intercept (b0) using formula
35+
b0 = y_mean - b1 * x_mean
36+
37+
# Compute predicted y values
38+
y_pred = b0 + b1 * x
39+
40+
# Compute Sum of Squared Errors (SSE)
41+
SSE = np.sum((y - y_pred)**2)
42+
43+
# Compute Total Sum of Squares (SST) for R²
44+
SST = np.sum((y - y_mean)**2)
45+
46+
# Compute R² score (coefficient of determination)
47+
R2 = 1 - (SSE / SST)
48+
49+
return b0, b1, y_pred, SSE, R2
50+
51+
52+
# --- Test Cases ---
53+
test_cases = {
54+
"Simple Linear": {
55+
"x": np.array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]),
56+
"y": np.array([1, 3, 2, 5, 7, 8, 8, 9, 10, 12])
57+
},
58+
"Perfectly Linear": {
59+
"x": np.arange(0, 10),
60+
"y": 3 * np.arange(0, 10) + 2 # y = 3x + 2
61+
},
62+
"Negative Slope": {
63+
"x": np.arange(0, 10),
64+
"y": 20 - 2 * np.arange(0, 10) # y = -2x + 20
65+
},
66+
"Noisy Data": {
67+
"x": np.arange(0, 10),
68+
"y": 5 * np.arange(0, 10) + np.random.normal(0, 3, 10) # y = 5x + noise
69+
}
70+
}
71+
72+
# Loop through each test case
73+
for name, data in test_cases.items():
74+
print(f"\n=== {name} ===")
75+
x, y = data["x"], data["y"]
76+
77+
# Compute linear regression using analytical method
78+
b0, b1, y_pred, SSE, R2 = linear_regression_analytical(x, y)
79+
80+
# Print coefficients and metrics
81+
print(f"Intercept (b0): {b0:.4f}, Slope (b1): {b1:.4f}")
82+
print(f"SSE: {SSE:.4f}, R²: {R2:.4f}")
83+
84+
# Plot data points and fitted line
85+
plt.figure(figsize=(6, 4))
86+
plt.scatter(x, y, color="blue", label="Data Points") # Original data points
87+
plt.plot(x, y_pred, "r-", label="Analytical Solution") # Fitted line
88+
plt.xlabel("x") # x-axis label
89+
plt.ylabel("y") # y-axis label
90+
plt.legend()
91+
plt.title(f"Linear Regression - {name}") # Plot title
92+
plt.show()

0 commit comments

Comments
 (0)