-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathmlp.py
More file actions
336 lines (278 loc) · 11.9 KB
/
mlp.py
File metadata and controls
336 lines (278 loc) · 11.9 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
"""Functions to perform classification with Artificial Neural Networks.
The entry point is the function run_mlp_training().
You can either choose to use Mean Square Error (MSE) or Cross Entropy as the error function in train_mlp().
"""
import numpy as np
def softmax(x):
"""Compute the softmax values for each sets (rows) of scores in x.
Parameters
----------
x : np.ndarray(np.float32)
A 2-D array containing the correspondence scores between one image and each labels (coded in rows).
Returns
-------
np.ndarray(np.float32)
The softmax activated scores.
"""
exps = np.exp(x - np.max(x))
return exps / np.sum(exps, axis=1, keepdims=True)
def one_hot(labels):
"""Return the one-hot matrix of label.
Parameters
----------
labels : np.ndarray(np.int64)
An (n)-D array of labels.
Returns
-------
np.ndarray(np.float64)
The corresponding (n+1)-D one-hot matrix.
"""
dimensions = np.max(labels) + 1
hot_labels = np.eye(dimensions)[labels]
return hot_labels
def learn_once_mse(w1, b1, w2, b2, data, targets, learning_rate):
"""Perform one gradient descent step using the Mean Square Error (MSE).
Parameters
----------
w1 : np.ndarray(np.float32)
The weight matrix (d_in x d_h) of the first (hidden) layer.
b1 : np.ndarray(np.float32)
The bias matrix (1, d_h) of the first (hidden) layer.
w2 : np.ndarray(np.float32)
The weight matrix (d_h x d_out) of the output layer.
b2 : np.ndarray(np.float32)
The bias matrix (1 x d_out) of the output layer.
data : np.ndarray(np.float32)
The input matrix (batch_size x d_in) containing training images in rows, and
targets : np.ndarray(np.int64)
the vector (batch_size) of corresponding labels for each image.
learning_rate : float
The learning rate.
Returns
-------
w1 : np.ndarray(np.float32)
The updated weight matrix (d_in x d_h) of the first (hidden) layer.
b1 : np.ndarray(np.float32)
The updated bias matrix (1, d_h) of the first (hidden) layer.
w2 : np.ndarray(np.float32)
The updated weight matrix (d_h x d_out) of the output layer.
b2 : np.ndarray(np.float32)
The updated bias matrix (1 x d_out) of the output layer.
loss : float
The value of the loss for one training epoch.
"""
N = data.shape[0]
# Resize targets from tuple to np.ndarray
r_targets = np.array([targets]).T
# Forward pass
a0 = data # the data are the input of the first layer
z1 = np.matmul(a0, w1) + b1 # input of the hidden layer
# output of the hidden layer (sigmoid activation function)
a1 = 1 / (1 + np.exp(-z1))
z2 = np.matmul(a1, w2) + b2 # input of the output layer
# output of the output layer (sigmoid activation function)
a2 = 1 / (1 + np.exp(-z2))
predictions = a2 # the predicted values are the outputs of the output layer
# Compute loss (MSE)
loss = np.mean(np.square(predictions - r_targets))
# compute accuracy
classification = np.argmax(predictions, axis=1, keepdims=True)
accuracy = np.count_nonzero(classification[:, 0] == targets) / len(targets)
print("MSE Loss before learning :", loss)
print("Accuracy is : ", accuracy)
# Gradient computation
dC_da2 = 2 * (a2 - r_targets) / N
dC_dz2 = dC_da2 * (a2 - np.square(a2))
dC_dw2 = np.matmul(a1.T, dC_dz2)
dC_db2 = np.sum(dC_dz2, axis=0, keepdims=True) / N
dC_da1 = np.matmul(dC_dz2, w2.T)
dC_dz1 = dC_da1 * (a1 - np.square(a1))
dC_dw1 = np.matmul(a0.T, dC_dz1)
dC_db1 = np.sum(dC_dz1, axis=0, keepdims=True) / N
w1 -= learning_rate * dC_dw1
b1 -= learning_rate * dC_db1
w2 -= learning_rate * dC_dw2
b2 -= learning_rate * dC_db2
return (w1, b1, w2, b2, loss, accuracy)
def learn_once_cross_entropy(w1, b1, w2, b2, data, labels_train, learning_rate):
"""Perform one gradient descent step using Cross Entropy (softmax activation function + one hot encoding of labels).
Parameters
----------
w1 : np.ndarray(np.float32)
The weight matrix (d_in x d_h) of the first (hidden) layer.
b1 : np.ndarray(np.float32)
The bias matrix (1, d_h) of the first (hidden) layer.
w2 : np.ndarray(np.float32)
The weight matrix (d_h x d_out) of the output layer.
b2 : np.ndarray(np.float32)
The bias matrix (1 x d_out) of the output layer.
data : np.ndarray(np.float32)
The input matrix (batch_size x d_in) containing training images in rows, and
labels_train : np.ndarray(np.int64)
the vector (batch_size) of corresponding labels for each image.
learning_rate : float
The learning rate.
Returns
-------
w1 : np.ndarray(np.float32)
The updated weight matrix (d_in x d_h) of the first (hidden) layer.
b1 : np.ndarray(np.float32)
The updated bias matrix (1, d_h) of the first (hidden) layer.
w2 : np.ndarray(np.float32)
The updated weight matrix (d_h x d_out) of the output layer.
b2 : np.ndarray(np.float32)
The updated bias matrix (1 x d_out) of the output layer.
loss : float
The value of the loss for one training epoch.
"""
N = data.shape[0]
# Forward pass
a0 = data # the data are the input of the first layer
z1 = np.matmul(a0, w1) + b1 # input of the hidden layer
# output of the hidden layer (sigmoid activation function)
a1 = 1 / (1 + np.exp(-z1))
z2 = np.matmul(a1, w2) + b2 # input of the output layer
# output of the output layer (softmax activation function)
a2 = softmax(z2)
predictions = a2 # the predicted values are the outputs of the output layer
targets_one_hot = one_hot(labels_train)
# Compute loss (Cross Entropy)
loss = -np.mean(targets_one_hot * np.log(predictions))
# Compute accuracy
classification = np.argmax(predictions, axis=1, keepdims=True)
accuracy = np.count_nonzero(classification[:, 0] == labels_train) / len(
labels_train
)
print("Loss before learning with cross entropy :", loss)
print("Accuracy is : ", accuracy)
# Gradient computation
# We admit that $`\frac{partial C}{partial Z^{(2)}} = A^{(2)} - Y`$.
dC_dz2 = a2 - targets_one_hot
dC_dw2 = np.matmul(a1.T, dC_dz2) / N
dC_db2 = np.sum(dC_dz2, axis=0, keepdims=True) / N
dC_da1 = np.matmul(dC_dz2, w2.T)
dC_dz1 = dC_da1 * (a1 - np.square(a1))
dC_dw1 = np.matmul(a0.T, dC_dz1) / N
dC_db1 = np.sum(dC_dz1, axis=0, keepdims=True) / N
w1 -= learning_rate * dC_dw1
b1 -= learning_rate * dC_db1
w2 -= learning_rate * dC_dw2
b2 -= learning_rate * dC_db2
return (w1, b1, w2, b2, loss, accuracy)
def train_mlp(w1, b1, w2, b2, data_train, labels_train, learning_rate, num_epochs):
"""Perform num_epoch training steps with given learning_rate and loss function.
Parameters
----------
w1 : np.ndarray(np.float32)
The weight matrix (d_in x d_h) of the first (hidden) layer.
b1 : np.ndarray(np.float32)
The bias matrix (1, d_h) of the first (hidden) layer.
w2 : np.ndarray(np.float32)
The weight matrix (d_h x d_out) of the output layer.
b2 : np.ndarray(np.float32)
The bias matrix (1 x d_out) of the output layer.
data_train : np.ndarray(np.float32)
The input matrix (batch_size x d_in) containing training images in rows, and
labels_train : np.ndarray(np.int64)
The vector (batch_size) of corresponding labels for each training image.
learning_rate : float
The learning rate.
num_epochs : int
The number of epochs to perform training.
Returns
-------
w1 : np.ndarray(np.float32)
The updated weight matrix (d_in x d_h) of the first (hidden) layer after complete training.
b1 : np.ndarray(np.float32)
The updated bias matrix (1, d_h) of the first (hidden) layer after complete training.
w2 : np.ndarray(np.float32)
The updated weight matrix (d_h x d_out) of the output layer after complete training.
b2 : np.ndarray(np.float32)
The updated bias matrix (1 x d_out) of the output layer after complete training.
train_accuracies : np.ndarray(float32)
A vector containing the accuracy before training for each epoch.
"""
train_accuracies = np.zeros((num_epochs, 1))
train_losses = np.zeros((num_epochs, 1))
for k in range(num_epochs):
print("Training MLP for epoch number :", k)
(w1, b1, w2, b2, loss, accuracy) = learn_once_cross_entropy(
w1, b1, w2, b2, data_train, labels_train, learning_rate
)
train_losses[k] = loss
train_accuracies[k] = accuracy
return (w1, b1, w2, b2, train_accuracies, train_losses)
def test_mlp(w1, b1, w2, b2, data_test, labels_test):
"""Test the trained network on the test set.
Parameters
----------
w1 : np.ndarray(np.float32)
The weight matrix (d_in x d_h) of the first (hidden) layer.
b1 : np.ndarray(np.float32)
The bias matrix (1, d_h) of the first (hidden) layer.
w2 : np.ndarray(np.float32)
The weight matrix (d_h x d_out) of the output layer.
b2 : np.ndarray(np.float32)
The bias matrix (1 x d_out) of the output layer.
data_test : np.ndarray(np.float32)
The input matrix (batch_size x d_in) containing test images in rows, and
labels_test : np.ndarray(np.int64)
the vector (batch_size) of corresponding labels for each image.
Returns
-------
float
The testing accuracy.
"""
# Forward pass
a0 = data_test # the data are the input of the first layer
z1 = np.matmul(a0, w1) + b1 # input of the hidden layer
# output of the hidden layer (sigmoid activation function)
a1 = 1 / (1 + np.exp(-z1))
z2 = np.matmul(a1, w2) + b2 # input of the output layer
# output of the output layer (sigmoid activation function)
a2 = 1 / (1 + np.exp(-z2))
predictions = a2 # the predicted values are the outputs of the output layer
classification = np.argmax(predictions, axis=1, keepdims=True)
nb_labels = len(labels_test)
nb_well_classified = np.count_nonzero(classification[:, 0] == labels_test)
accuracy = nb_well_classified / nb_labels
return accuracy
def run_mlp_training(
data_train, labels_train, data_test, labels_test, d_h, learning_rate, num_epochs
):
"""Train an MLP classifier and test the trained network (weights and biases) on the test set.
Parameters
----------
data_train : np.ndarray(np.float32)
The input matrix (batch_size x d_in) containing training images in rows, and
labels_train : np.ndarray(np.int64)
The vector (batch_size) of corresponding labels for each training image.
data_test : np.ndarray(np.float32)
The input matrix (batch_size x d_in) containing test images in rows, and
labels_test : np.ndarray(np.int64)
the vector (batch_size) of corresponding labels for each image.
d_h : int
The number of neurons in the hidden layer.
learning_rate : float
The learning rate.
num_epochs : int
The number of epochs to perform training.
Returns
-------
train_accuracies : np.ndarray(float32)
A vector containing the accuracy before training for each epoch.
final_accuracy : float
The testing accuracy.
"""
d_in = np.shape(data_train)[1]
d_out = 10
# Random initialization of the network weights and biaises
w1 = 2 * np.random.rand(d_in, d_h) - 1 # first layer weights
b1 = np.zeros((1, d_h)) # first layer biaises
w2 = 2 * np.random.rand(d_h, d_out) - 1 # second layer weights
b2 = np.zeros((1, d_out)) # second layer biaises
(w1, b1, w2, b2, train_accuracies, train_losses) = train_mlp(
w1, b1, w2, b2, data_train, labels_train, learning_rate, num_epochs
)
final_accuracy = test_mlp(w1, b1, w2, b2, data_test, labels_test)
return (train_accuracies, train_losses, final_accuracy)