-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathSoftmax.py
More file actions
121 lines (100 loc) · 3.55 KB
/
Softmax.py
File metadata and controls
121 lines (100 loc) · 3.55 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
import time
import numpy as np
class Softmax:
def __init__(self):
self.X: np.ndarray = None
self.y: np.ndarray = None
self.y_hot: np.ndarray = None
self.W: np.ndarray = None
self.b: np.ndarray = None
self.mu: None
self.sigma = None
def set_weight(self, W):
self.W: np.ndarray = W
def set_bias(self, b):
self.b: np.ndarray = b
def fit(self, X, y):
self.X: np.ndarray = X
self.y: np.ndarray = y
self._one_hot_enc()
def fit(self, X, y, W, b):
self.W: np.ndarray = W
self.b: np.ndarray = b
self.X: np.ndarray = X
self.y: np.ndarray = y
self._one_hot_enc()
def get_weights_bias(self):
'''return -> W, b'''
return self.W, self.b
def _softmax(self, z):
# Stable Softmax (prevents overflow)
z = z - np.max(z, axis=1).reshape((z.shape[0], 1))
ez = np.exp(z)
ez_sum = np.sum(ez, axis=1).reshape((-1, 1))
return ez / ez_sum
# def _get_f_wb(self):
# X, W, b = self.X, self.W, self.b
# # Weights are in columns as w1 in column 1, w2 in column 2 and so on.
# z = np.matmul(X, W) + b
# # print(z)
# f_wb = self._softmax(z)
# # print('fwb',f_wb)
# return f_wb
def _one_hot_enc(self):
_, c = self.W.shape
y_hot = np.zeros((len(self.y), c))
y_hot[np.arange(len(self.y)), self.y] = 1
self.y_hot = y_hot
return y_hot
def _get_cost(self):
X, W, b, y = self.X, self.W, self.b, self.y
# Unfortunately below code won't work because of high possibility of log(0) errors.
# t = time.time()
# f_wb = self._get_f_wb()
# y_hat = f_wb[np.arange(y.shape[0]), y]
# j_wb = -np.log(y_hat)
# cost = np.sum(j_wb)
# print('t1', time.time() - t)
# Below code works without log(0) errors
# Weights are in columns as w1(28*28) in column 1, w2 in column 2 and so on.
z = np.matmul(X, W) + b
z_max = np.max(z, axis=1).reshape((z.shape[0], 1))
z_new = z - z_max
ez_new = np.exp(z_new)
ez_sum = np.sum(ez_new, axis=1).reshape((-1, 1))
log_softmax = z_new - np.log(ez_sum)
loss = -log_softmax[np.arange(y.shape[0]), y]
cost = np.mean(loss)
return cost
def _get_gradient(self):
X, y, W, b, y_hot = self.X, self.y, self.W, self.b, self.y_hot
z = np.matmul(X, W) + b
f_wb = self._softmax(z)
m, n = X.shape
dj_dw = np.matmul(X.T, f_wb - y_hot) / m
dj_db = np.sum(f_wb - y_hot, axis=0) / m
return dj_dw, dj_db
def _gradient_descent(self, alpha, epochs):
# 'alpa' is learning rate
for epoch in range(epochs):
dj_dw, dj_db = self._get_gradient()
self.W -= alpha * dj_dw
self.b -= alpha * dj_db
if (epoch + 1) % 10 == 0 or epoch + 1 == epochs or epoch == 0:
print("Epoch {}, cost: {}".format(epoch+1, self._get_cost()))
def optimize(self, alpha, epochs):
self._gradient_descent(alpha=alpha, epochs=epochs)
def predict(self, X):
z = np.matmul(X, self.W) + self.b
f_wb = self._softmax(z)
return np.argmax(f_wb, axis=1)
def normalize_data(self):
X = self.X
mu = np.mean(X)
sigma = np.std(X)
self.X = (X - mu) / sigma
self.mu = mu
self.sigma = sigma
def get_normailizing_data(self):
'''return -> mu, sigma'''
return self.mu, self.sigma