|
1 | | -function [model, L] = mlpReg(X,Y,k,lambda) |
2 | | -% Train a multilayer perceptron neural network |
| 1 | +function [model, L] = mlpReg(X,y,k,lambda) |
| 2 | +% Train a multilayer perceptron neural network for regression with backpropagation |
| 3 | +% tanh activation function is used |
3 | 4 | % Input: |
4 | 5 | % X: d x n data matrix |
5 | | -% Y: p x n response matrix |
| 6 | +% y: p x n response matrix |
6 | 7 | % k: T x 1 vector to specify number of hidden nodes in each layer |
7 | 8 | % lambda: regularization parameter |
8 | 9 | % Ouput: |
9 | 10 | % model: model structure |
10 | | -% L: loss |
| 11 | +% L: (regularized least square) loss |
11 | 12 | % Written by Mo Chen (sth4nth@gmail.com). |
12 | 13 | if nargin < 4 |
13 | 14 | lambda = 1e-2; |
14 | 15 | end |
15 | | -eta = 1e-3; |
| 16 | +eta = 1e-5; |
| 17 | +tol = 1e-5; |
16 | 18 | maxiter = 50000; |
17 | 19 | L = inf(1,maxiter); |
18 | 20 |
|
19 | | -k = [size(X,1);k(:);size(Y,1)]; |
| 21 | +k = [size(X,1);k(:);size(y,1)]; |
20 | 22 | T = numel(k)-1; |
21 | 23 | W = cell(T,1); |
22 | 24 | b = cell(T,1); |
|
30 | 32 | for iter = 2:maxiter |
31 | 33 | % forward |
32 | 34 | for t = 1:T-1 |
33 | | - Z{t+1} = tanh(W{t}'*Z{t}+b{t}); |
| 35 | + Z{t+1} = tanh(W{t}'*Z{t}+b{t}); % 5.10 5.113 |
34 | 36 | end |
35 | | - Z{T+1} = W{T}'*Z{T}+b{T}; |
| 37 | + Z{T+1} = W{T}'*Z{T}+b{T}; % 5.114 |
36 | 38 |
|
37 | 39 | % loss |
38 | | - E = Z{T+1}-Y; |
| 40 | + E = Z{T+1}-y; |
39 | 41 | Wn = cellfun(@(x) dot(x(:),x(:)),W); % |W|^2 |
40 | 42 | L(iter) = dot(E(:),E(:))+lambda*sum(Wn); |
41 | | - |
| 43 | + if abs(L(iter)-L(iter-1)) < tol*L(iter-1); break; end |
| 44 | + |
42 | 45 | % backward |
43 | | - R{T} = E; % delta |
| 46 | + R{T} = E; |
44 | 47 | for t = T-1:-1:1 |
45 | 48 | df = 1-Z{t+1}.^2; % h'(a) |
46 | | - R{t} = df.*(W{t+1}*R{t+1}); % delta |
| 49 | + R{t} = df.*(W{t+1}*R{t+1}); % 5.66 |
47 | 50 | end |
48 | 51 |
|
49 | 52 | % gradient descent |
50 | 53 | for t=1:T |
51 | | - dW = Z{t}*R{t}'+lambda*W{t}; |
| 54 | + dW = Z{t}*R{t}'+lambda*W{t}; % 5.67 |
52 | 55 | db = sum(R{t},2); |
53 | | - W{t} = W{t}-eta*dW; |
| 56 | + W{t} = W{t}-eta*dW; % 5.43 |
54 | 57 | b{t} = b{t}-eta*db; |
55 | 58 | end |
56 | 59 | end |
57 | | -L = L(1,2:iter); |
| 60 | +L = L(2:iter); |
58 | 61 | model.W = W; |
59 | 62 | model.b = b; |
0 commit comments