Oja-Plasticity-Feedforward/main.py at trunk · NeuralDynamicsAndComputing/Oja-Plasticity-Feedforward · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
import os
import copy
import torch
import pickle
import argparse
import warnings
import datetime
import numpy as np

from torch import nn
from random import randrange
from torch.nn.utils import stateless
from torch.utils.data import DataLoader
from torchvision import datasets, transforms
from torch.utils.data.sampler import SubsetRandomSampler

from optim import MyOptimizer, generic_rule
from utils import log, measure_angle, stats, Plot

warnings.simplefilter(action='ignore', category=UserWarning)


class MyModel(nn.Module):
    def __init__(self, dim, in_channel, out_dim, hidden_dims):
        """
            Initialize MyModel object.

        This method creates a neural network model with forward pathways and
        activation functions. We have followed a naming convention for the
        module names, which are defined
        as follows:
        - 'fd': forward pathway connections,
        - 'fk': feedback pathway connections (via :meth:`MyModel.add_feedback_pathways`),

        .. note::
            Pre max-pooling layers should always include :attr:`pool_setting`
            containing (kernel, stride, padding).

            Example::
                self.mxp = nn.MaxPool2d(2, stride=2)
                self.fdx = nn.Conv2d(6, 6, 3, padding=1, bias=False)
                self.fdx.pool_setting = (self.mxp.kernel_size, self.mxp.stride,
                                         self.mxp.padding)

        :param dim: (int) Input dimension.
        :param in_channel: (int) Number of input channels.
        :param out_dim: (int) Output dimension.
        """
        super(MyModel, self).__init__()

        self.dim = dim
        self.L = len(hidden_dims)

        # -- activation functions
        self.Beta = 10
        self.sopl = nn.Softplus(beta=self.Beta)

        # -- forward pathway
        prev_dim = in_channel * dim * dim
        for i, hidden_dim in enumerate(hidden_dims):
            setattr(self, f"fd{i + 1}", nn.Linear(prev_dim, hidden_dim, bias=False))
            prev_dim = hidden_dim
        setattr(self, f"fd{len(hidden_dims) + 1}", nn.Linear(prev_dim, out_dim, bias=False))

    def add_feedback_pathways(self):
        """
            Add feedback pathways to the model.

        This method creates a deep copy of each forward pathway layer and
        adds it to the model as a new feedback pathway layer. Per our naming
        convention, each feedback pathway layer is named as `fk` followed by
        a unique integer index.

        :return: None
        """
        # -- feedback pathways
        for i, layer in enumerate([layer for name, layer in self.named_children() if 'fd' in name]):
            setattr(self, f"fk{i + 1}", copy.deepcopy(layer))

    def forward(self, x):
        """
            Perform forward pass of information.

        The function takes in an input tensor x and performs forward propagation
        through the network.

        :param x: (torch.Tensor) input images.
        :return: tuple: a tuple containing the input, activations across network
            layers, and predicted logits.
        """
        y = [x.view(x.shape[0], -1)]

        for name, layer in self.named_children():
            if 'fd' in name and str(self.L + 1) not in name:
                y.append(self.sopl(layer(y[-1])))
            elif 'fd' in name:
                logit = layer(y[-1])

        return tuple(y), logit


class Train:
    def __init__(self, args):
        """
            Initialize the Train object.

        :param args: (argparse.Namespace) The command-line arguments.
        """
        # -- set seeds
        np.random.seed(args.seed)
        torch.manual_seed(args.seed)

        # -- data settings
        transform = transforms.Compose([transforms.Resize((args.dim, args.dim)), transforms.ToTensor()])

        if os.path.exists('../../data'):
            data_dir = '../../data'
        elif os.path.exists('../MetaPlasticity/data'):
            data_dir = '../MetaPlasticity/data'
        else:
            raise ValueError('data directory not found')

        if args.database == 'CIFAR10':
            in_channel = 3
            dataset_train = datasets.CIFAR10(data_dir, train=True, download=False, transform=transform)
            dataset_test = datasets.CIFAR10(data_dir, train=False, download=False, transform=transform)
        elif args.database == 'FashionMNIST':
            in_channel = 1
            dataset_train = datasets.FashionMNIST(data_dir, train=True, download=False, transform=transform)
            dataset_test = datasets.FashionMNIST(data_dir, train=False, download=False, transform=transform)
        elif args.database == 'MNIST':
            in_channel = 1
            dataset_train = datasets.MNIST(data_dir, train=True, download=False, transform=transform)
            dataset_test = datasets.MNIST(data_dir, train=False, download=False, transform=transform)
        else:
            raise ValueError(f'Unknown database: {args.database}')

        train_sampler = SubsetRandomSampler(np.random.choice(range(50000), args.n_train, False))
        self.train_loader = DataLoader(dataset_train, batch_size=args.batch_size, sampler=train_sampler)
        test_sampler = SubsetRandomSampler(np.random.choice(range(10000), args.n_test, False))
        self.test_loader = DataLoader(dataset_test, batch_size=args.n_test, sampler=test_sampler)

        # todo: #-- processor params
        self.device = args.device

        # -- load model
        self.init_method = args.init_method
        self.model = self.load_model(args.dim, in_channel, hidden_dims=args.hidden_dims).to(self.device)

        # -- training settings
        self.epochs = args.epochs

        # -- optim settings
        self.fbk = args.fbk
        self.OptimAdpt = MyOptimizer(generic_rule, args.vec, self.fbk, args.Theta)
        self.loss_func = nn.CrossEntropyLoss()
        self.vec = args.vec
        self.Theta = args.Theta

        # -- logging settings
        self.res_dir = args.res_dir
        self.test_name = args.test_name
        self.plot = Plot(self.res_dir)

    def load_model(self, dim, in_channel, hidden_dims):
        """
            Load classifier model

        Loads the classifier network and sets the grad computation flags and
        convolution and pooling attributes for its variables. For module naming
        conventions see :meth:`MyModel.__init__`.

        :param args: (argparse.Namespace) The command-line arguments.
        :return: model
        """
        # -- init model
        model = MyModel(dim, in_channel, out_dim=10, hidden_dims=hidden_dims)
        model.add_feedback_pathways()

        # -- set param attributes
        for key, mod in model.named_children():
            for param in mod.parameters():
                param.classname = mod.__class__.__name__
                param.requires_grad = 'fk' not in key
                param.out_channels = mod.__dict__.get('out_channels', None)
                param.padding = mod.__dict__.get('padding', None)
                param.pool_setting = mod.__dict__.get('pool_setting', None)

        return model

    def reinitialize(self):
        """
            Initialize module parameters.

        Initializes and clones the model parameters, creating a separate copy
        of the data in new memory. This duplication enables the modification
        of the parameters using inplace operations

        :return: dict: module parameters
        """

        # -- initialize params
        for name, m in self.model.named_modules():
            if isinstance(m, torch.nn.Linear):
                if self.init_method == 'naive':
                    torch.nn.init.normal_(m.weight, 0, 0.01)
                else:
                    # torch.nn.init.orthogonal_(m.weight)
                    torch.nn.init.xavier_uniform(m.weight)
            elif isinstance(m, torch.nn.Conv2d):
                if self.init_method == 'naive':
                    torch.nn.init.normal_(m.weight, 0, 0.01)
                else:
                    # torch.nn.init.orthogonal_(m.weight)
                    torch.nn.init.xavier_uniform(m.weight)

        # -- enforce symmetric feedback initialization when backprop is training
        if self.fbk == 'sym':
            n_layers = len([name for name, _ in self.model.named_children() if 'fd' in name])
            for i in range(1, n_layers + 1):
                getattr(self.model, f'fk{i}').weight.data = getattr(self.model, f'fd{i}').weight.data

        # -- clone params
        params = {key: val.clone() for key, val in self.model.named_parameters() if '.' in key}
        named_parameters = dict(self.model.named_parameters())
        for key in params:
            params[key].__dict__.update({
                'classname': named_parameters[key].classname,
                'padding': named_parameters[key].padding,
                'out_channels': named_parameters[key].out_channels,
                'pool_setting': named_parameters[key].pool_setting})

        return params

    def train(self):
        """
            Perform training.

        :return: None
        """
        self.model.train()

        # -- initialize
        params = self.reinitialize()

        # -- train
        for ep_idx in range(0, self.epochs):

            for bc_idx, (x, label) in enumerate(self.train_loader):

                x, label = x.to(self.device), label.to(self.device)

                # -- predict
                with torch.no_grad():
                    y, logits = stateless.functional_call(self.model, params, x)

                # -- update network params
                self.OptimAdpt(params, logits, label, y, self.model.Beta)

                # -- stats
                loss, acc = stats(self.test_loader, self.train_loader, self.model, self.loss_func, self.res_dir, params,
                                  self.model.Beta, bc_idx, self.Theta, self.vec, self.fbk, x, label,
                                  bc_idx == len(self.train_loader)-1)

                print(f'Iteration {bc_idx}: loss = {loss}, acc = {acc} ')

                log([loss.detach().numpy()], f'{self.res_dir}/loss.txt')
                log([acc], f'{self.res_dir}/acc.txt')

        self.plot()


def parse_args():
    """
        Parses the input arguments for the meta-learning model.

    The function creates an argument parser with various input parameters for
    the model. These parameters include processor, data, training, log, and
    model parameters. After parsing the input arguments, the function sets up
    the storage and GPU settings and returns the validated input arguments
    using :func:`check_args`.

    :return: args: (argparse.Namespace) The validated input arguments for the
    model.
    """
    desc = "Pytorch implementation of feedback alignment."
    parser = argparse.ArgumentParser(description=desc)

    # -- set model params
    parser.add_argument('--hidden_dims', type=int, nargs='+', help='List of hidden dimensions')

    # -- set processor params
    parser.add_argument('--gpu_mode', type=int, default=1, help='Accelerate the script using GPU.')

    # -- set training params
    parser.add_argument('--seed', type=int, default=5, help='Random seed.')
    parser.add_argument('--epochs', type=int, default=1, help='Number of training epochs.')
    parser.add_argument('--batch_size', type=int, default=1, help='Number of data points per training batch.')
    parser.add_argument('--n_train', type=int, default=5110, help='.')
    parser.add_argument('--n_test', type=int, default=100, help='.')
    parser.add_argument('--database', type=str, default='CIFAR10', help='Training database.')
    parser.add_argument('--dim', type=int, default=32, help='.')
    parser.add_argument('--init_method', type=str, default='xavier', help='Weight initialization method.')

    # -- set plasticity rules
    parser.add_argument('--fbk', type=str, default='sym',
                        help='Feedback connection type: 1) sym = Symmetric feedback; 2) fix = Fixed random feedback.')
    parser.add_argument('--vec', nargs='*', default=[],
                        help='Index vector specifying the plasticity terms to be used for model training in '
                             'adaptation.')

    # -- set plasticity params
    parser.add_argument('--Theta', nargs='*', default=[0.01], help='Plasticity hyper-parameters.')

    # -- set save directory
    parser.add_argument('--test_name', type=str, default='', help='.')
    parser.add_argument('--test_sub_name', type=str, default='', help='.')

    args = parser.parse_args()

    # -- GPU settings
    args.device = torch.device('cuda' if (bool(args.gpu_mode) and torch.cuda.is_available()) else 'cpu')

    # -- set results directory
    args.res_dir = f'./results/{args.test_name}/{args.test_sub_name}/' \
        f'{datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S")}_{str(randrange(80))}'

    return check_args(args)


def check_args(args):
    """
        Check validity of the input arguments.

    This function checks validity of the input arguments. It also stores the
    settings by writing them to a file named `args.txt` in the `res_dir`
    directory specified in the input arguments.

    :param args: (argparse.Namespace) The command-line arguments.
    :return: args: (argparse.Namespace) The validated input arguments.
    """
    # -- create results directory
    if not os.path.exists(args.res_dir):
        os.makedirs(args.res_dir)

    # -- turn thetas into floats
    """
    FashionMNIST F_Oja test:    theta_0, theta_9, theta_2 = 0.005  , -0.00   , 0.0007
    FashionMNIST F0 test:       theta_0, theta_9, theta_2 = 0.0018 , -0.00   , 0.
    FashionMNIST F_BP test:     theta_0, theta_9, theta_2 = 0.0119 , -0.00   , 0.
    MNIST F_BP test:            theta_0, theta_9, theta_2 = 0.0119 , -0.00   , 0.
    MNIST F_0 test:             theta_0, theta_9, theta_2 = 0.0029 , -0.00   , 0.
    MNIST F_eHebb test:         theta_0, theta_9, theta_2 =
    MNIST F_Oja test:           theta_0, theta_9, theta_2 =
    MNIST F_bio test:           theta_0, theta_9, theta_2 = 0.011  , -0.05   , -0.005
    MNIST DFA F0 test:          theta_0, theta_9, theta_2 = 0.0021 , -0.00   , 0.
    """
    args.Theta = [float(item) for item in args.Theta]

    # args.Theta =[0.08 , -0.00   , 0.]  # F_FA
    # args.Theta =[0.01, 0, -0.0040]  # F_Oja

    # -- store settings
    with open(f'{args.res_dir}/args.txt', 'w') as fp:
        for item in vars(args).items():
            fp.write(f'{item[0]} : {item[1]}\n')

    return args


def main():
    """
        Main function for training classifier.

    This function serves as the entry point for training the classifier model
    and performs the following operations:
    1) Loads and parses command-line arguments,
    2) Initializes :class:`Train` and trains the model.

    :return: None
    """
    # -- load arguments
    args = parse_args()

    # -- train
    my_train = Train(args)
    my_train.train()


if __name__ == '__main__':
    main()