lxmls-toolkit/lxmls/deep_learning/numpy_models/log_linear.py at 7d0cc2452291c815b6ef753b6cdf49bd8bf30ea0 · LxMLS/lxmls-toolkit · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
import numpy as np
from lxmls.deep_learning.utils import (
    Model,
    glorot_weight_init,
    index2onehot,
    logsumexp
)


class NumpyLogLinear(Model):

    def __init__(self, **config):

        # Initialize parameters
        weight_shape = (config['input_size'], config['num_classes'])
        # after Xavier Glorot et al
        self.weight = glorot_weight_init(weight_shape, 'softmax')
        self.bias = np.zeros((1, config['num_classes']))
        self.learning_rate = config['learning_rate']

    def log_forward(self, X):
        """Forward pass of the computation graph"""

        # Linear transformation
        z = np.dot(X, self.weight.T) + self.bias

        # Softmax implemented in log domain
        log_tilde_z = z - logsumexp(z, axis=1, keepdims=True)

        return log_tilde_z

    def predict(self, X):
        """Most probable class index"""
        return np.argmax(self.log_forward(X), axis=1)

    def update(self, X, y):
        """Stochastic Gradient Descent update"""

        # Probabilities of each class
        class_probabilities = np.exp(self.log_forward(X))
        batch_size, num_classes = class_probabilities.shape

        # Error derivative at softmax layer
        I = index2onehot(y, num_classes)
        error = - (I - class_probabilities) / batch_size

        # Weight gradient
        gradient_weight = np.zeros(self.weight.shape)
        for l in np.arange(batch_size):
            gradient_weight += np.outer(error[l, :], X[l, :])

        # Bias gradient
        gradient_bias = np.sum(error, axis=0, keepdims=True)

        # SGD update
        self.weight = self.weight - self.learning_rate * gradient_weight
        self.bias = self.bias - self.learning_rate * gradient_bias