-
Notifications
You must be signed in to change notification settings - Fork 222
Expand file tree
/
Copy pathlog_linear.py
More file actions
57 lines (42 loc) · 1.7 KB
/
Copy pathlog_linear.py
File metadata and controls
57 lines (42 loc) · 1.7 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
import numpy as np
from lxmls.deep_learning.utils import (
Model,
glorot_weight_init,
index2onehot,
logsumexp
)
class NumpyLogLinear(Model):
def __init__(self, **config):
# Initialize parameters
weight_shape = (config['input_size'], config['num_classes'])
# after Xavier Glorot et al
self.weight = glorot_weight_init(weight_shape, 'softmax')
self.bias = np.zeros((1, config['num_classes']))
self.learning_rate = config['learning_rate']
def log_forward(self, X):
"""Forward pass of the computation graph"""
# Linear transformation
z = np.dot(X, self.weight.T) + self.bias
# Softmax implemented in log domain
log_tilde_z = z - logsumexp(z, axis=1, keepdims=True)
return log_tilde_z
def predict(self, X):
"""Most probable class index"""
return np.argmax(self.log_forward(X), axis=1)
def update(self, X, y):
"""Stochastic Gradient Descent update"""
# Probabilities of each class
class_probabilities = np.exp(self.log_forward(X))
batch_size, num_classes = class_probabilities.shape
# Error derivative at softmax layer
I = index2onehot(y, num_classes)
error = - (I - class_probabilities) / batch_size
# Weight gradient
gradient_weight = np.zeros(self.weight.shape)
for l in np.arange(batch_size):
gradient_weight += np.outer(error[l, :], X[l, :])
# Bias gradient
gradient_bias = np.sum(error, axis=0, keepdims=True)
# SGD update
self.weight = self.weight - self.learning_rate * gradient_weight
self.bias = self.bias - self.learning_rate * gradient_bias