-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathmain.py
More file actions
391 lines (306 loc) · 14.7 KB
/
main.py
File metadata and controls
391 lines (306 loc) · 14.7 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
import os
import copy
import torch
import pickle
import argparse
import warnings
import datetime
import numpy as np
from torch import nn
from random import randrange
from torch.nn.utils import stateless
from torch.utils.data import DataLoader
from torchvision import datasets, transforms
from torch.utils.data.sampler import SubsetRandomSampler
from optim import MyOptimizer, generic_rule
from utils import log, measure_angle, stats, Plot
warnings.simplefilter(action='ignore', category=UserWarning)
class MyModel(nn.Module):
def __init__(self, dim, in_channel, out_dim, hidden_dims):
"""
Initialize MyModel object.
This method creates a neural network model with forward pathways and
activation functions. We have followed a naming convention for the
module names, which are defined
as follows:
- 'fd': forward pathway connections,
- 'fk': feedback pathway connections (via :meth:`MyModel.add_feedback_pathways`),
.. note::
Pre max-pooling layers should always include :attr:`pool_setting`
containing (kernel, stride, padding).
Example::
self.mxp = nn.MaxPool2d(2, stride=2)
self.fdx = nn.Conv2d(6, 6, 3, padding=1, bias=False)
self.fdx.pool_setting = (self.mxp.kernel_size, self.mxp.stride,
self.mxp.padding)
:param dim: (int) Input dimension.
:param in_channel: (int) Number of input channels.
:param out_dim: (int) Output dimension.
"""
super(MyModel, self).__init__()
self.dim = dim
self.L = len(hidden_dims)
# -- activation functions
self.Beta = 10
self.sopl = nn.Softplus(beta=self.Beta)
# -- forward pathway
prev_dim = in_channel * dim * dim
for i, hidden_dim in enumerate(hidden_dims):
setattr(self, f"fd{i + 1}", nn.Linear(prev_dim, hidden_dim, bias=False))
prev_dim = hidden_dim
setattr(self, f"fd{len(hidden_dims) + 1}", nn.Linear(prev_dim, out_dim, bias=False))
def add_feedback_pathways(self):
"""
Add feedback pathways to the model.
This method creates a deep copy of each forward pathway layer and
adds it to the model as a new feedback pathway layer. Per our naming
convention, each feedback pathway layer is named as `fk` followed by
a unique integer index.
:return: None
"""
# -- feedback pathways
for i, layer in enumerate([layer for name, layer in self.named_children() if 'fd' in name]):
setattr(self, f"fk{i + 1}", copy.deepcopy(layer))
def forward(self, x):
"""
Perform forward pass of information.
The function takes in an input tensor x and performs forward propagation
through the network.
:param x: (torch.Tensor) input images.
:return: tuple: a tuple containing the input, activations across network
layers, and predicted logits.
"""
y = [x.view(x.shape[0], -1)]
for name, layer in self.named_children():
if 'fd' in name and str(self.L + 1) not in name:
y.append(self.sopl(layer(y[-1])))
elif 'fd' in name:
logit = layer(y[-1])
return tuple(y), logit
class Train:
def __init__(self, args):
"""
Initialize the Train object.
:param args: (argparse.Namespace) The command-line arguments.
"""
# -- set seeds
np.random.seed(args.seed)
torch.manual_seed(args.seed)
# -- data settings
transform = transforms.Compose([transforms.Resize((args.dim, args.dim)), transforms.ToTensor()])
if os.path.exists('../../data'):
data_dir = '../../data'
elif os.path.exists('../MetaPlasticity/data'):
data_dir = '../MetaPlasticity/data'
else:
raise ValueError('data directory not found')
if args.database == 'CIFAR10':
in_channel = 3
dataset_train = datasets.CIFAR10(data_dir, train=True, download=False, transform=transform)
dataset_test = datasets.CIFAR10(data_dir, train=False, download=False, transform=transform)
elif args.database == 'FashionMNIST':
in_channel = 1
dataset_train = datasets.FashionMNIST(data_dir, train=True, download=False, transform=transform)
dataset_test = datasets.FashionMNIST(data_dir, train=False, download=False, transform=transform)
elif args.database == 'MNIST':
in_channel = 1
dataset_train = datasets.MNIST(data_dir, train=True, download=False, transform=transform)
dataset_test = datasets.MNIST(data_dir, train=False, download=False, transform=transform)
else:
raise ValueError(f'Unknown database: {args.database}')
train_sampler = SubsetRandomSampler(np.random.choice(range(50000), args.n_train, False))
self.train_loader = DataLoader(dataset_train, batch_size=args.batch_size, sampler=train_sampler)
test_sampler = SubsetRandomSampler(np.random.choice(range(10000), args.n_test, False))
self.test_loader = DataLoader(dataset_test, batch_size=args.n_test, sampler=test_sampler)
# todo: #-- processor params
self.device = args.device
# -- load model
self.init_method = args.init_method
self.model = self.load_model(args.dim, in_channel, hidden_dims=args.hidden_dims).to(self.device)
# -- training settings
self.epochs = args.epochs
# -- optim settings
self.fbk = args.fbk
self.OptimAdpt = MyOptimizer(generic_rule, args.vec, self.fbk, args.Theta)
self.loss_func = nn.CrossEntropyLoss()
self.vec = args.vec
self.Theta = args.Theta
# -- logging settings
self.res_dir = args.res_dir
self.test_name = args.test_name
self.plot = Plot(self.res_dir)
def load_model(self, dim, in_channel, hidden_dims):
"""
Load classifier model
Loads the classifier network and sets the grad computation flags and
convolution and pooling attributes for its variables. For module naming
conventions see :meth:`MyModel.__init__`.
:param args: (argparse.Namespace) The command-line arguments.
:return: model
"""
# -- init model
model = MyModel(dim, in_channel, out_dim=10, hidden_dims=hidden_dims)
model.add_feedback_pathways()
# -- set param attributes
for key, mod in model.named_children():
for param in mod.parameters():
param.classname = mod.__class__.__name__
param.requires_grad = 'fk' not in key
param.out_channels = mod.__dict__.get('out_channels', None)
param.padding = mod.__dict__.get('padding', None)
param.pool_setting = mod.__dict__.get('pool_setting', None)
return model
def reinitialize(self):
"""
Initialize module parameters.
Initializes and clones the model parameters, creating a separate copy
of the data in new memory. This duplication enables the modification
of the parameters using inplace operations
:return: dict: module parameters
"""
# -- initialize params
for name, m in self.model.named_modules():
if isinstance(m, torch.nn.Linear):
if self.init_method == 'naive':
torch.nn.init.normal_(m.weight, 0, 0.01)
else:
# torch.nn.init.orthogonal_(m.weight)
torch.nn.init.xavier_uniform(m.weight)
elif isinstance(m, torch.nn.Conv2d):
if self.init_method == 'naive':
torch.nn.init.normal_(m.weight, 0, 0.01)
else:
# torch.nn.init.orthogonal_(m.weight)
torch.nn.init.xavier_uniform(m.weight)
# -- enforce symmetric feedback initialization when backprop is training
if self.fbk == 'sym':
n_layers = len([name for name, _ in self.model.named_children() if 'fd' in name])
for i in range(1, n_layers + 1):
getattr(self.model, f'fk{i}').weight.data = getattr(self.model, f'fd{i}').weight.data
# -- clone params
params = {key: val.clone() for key, val in self.model.named_parameters() if '.' in key}
named_parameters = dict(self.model.named_parameters())
for key in params:
params[key].__dict__.update({
'classname': named_parameters[key].classname,
'padding': named_parameters[key].padding,
'out_channels': named_parameters[key].out_channels,
'pool_setting': named_parameters[key].pool_setting})
return params
def train(self):
"""
Perform training.
:return: None
"""
self.model.train()
# -- initialize
params = self.reinitialize()
# -- train
for ep_idx in range(0, self.epochs):
for bc_idx, (x, label) in enumerate(self.train_loader):
x, label = x.to(self.device), label.to(self.device)
# -- predict
with torch.no_grad():
y, logits = stateless.functional_call(self.model, params, x)
# -- update network params
self.OptimAdpt(params, logits, label, y, self.model.Beta)
# -- stats
loss, acc = stats(self.test_loader, self.train_loader, self.model, self.loss_func, self.res_dir, params,
self.model.Beta, bc_idx, self.Theta, self.vec, self.fbk, x, label,
bc_idx == len(self.train_loader)-1)
print(f'Iteration {bc_idx}: loss = {loss}, acc = {acc} ')
log([loss.detach().numpy()], f'{self.res_dir}/loss.txt')
log([acc], f'{self.res_dir}/acc.txt')
self.plot()
def parse_args():
"""
Parses the input arguments for the meta-learning model.
The function creates an argument parser with various input parameters for
the model. These parameters include processor, data, training, log, and
model parameters. After parsing the input arguments, the function sets up
the storage and GPU settings and returns the validated input arguments
using :func:`check_args`.
:return: args: (argparse.Namespace) The validated input arguments for the
model.
"""
desc = "Pytorch implementation of feedback alignment."
parser = argparse.ArgumentParser(description=desc)
# -- set model params
parser.add_argument('--hidden_dims', type=int, nargs='+', help='List of hidden dimensions')
# -- set processor params
parser.add_argument('--gpu_mode', type=int, default=1, help='Accelerate the script using GPU.')
# -- set training params
parser.add_argument('--seed', type=int, default=5, help='Random seed.')
parser.add_argument('--epochs', type=int, default=1, help='Number of training epochs.')
parser.add_argument('--batch_size', type=int, default=1, help='Number of data points per training batch.')
parser.add_argument('--n_train', type=int, default=5110, help='.')
parser.add_argument('--n_test', type=int, default=100, help='.')
parser.add_argument('--database', type=str, default='CIFAR10', help='Training database.')
parser.add_argument('--dim', type=int, default=32, help='.')
parser.add_argument('--init_method', type=str, default='xavier', help='Weight initialization method.')
# -- set plasticity rules
parser.add_argument('--fbk', type=str, default='sym',
help='Feedback connection type: 1) sym = Symmetric feedback; 2) fix = Fixed random feedback.')
parser.add_argument('--vec', nargs='*', default=[],
help='Index vector specifying the plasticity terms to be used for model training in '
'adaptation.')
# -- set plasticity params
parser.add_argument('--Theta', nargs='*', default=[0.01], help='Plasticity hyper-parameters.')
# -- set save directory
parser.add_argument('--test_name', type=str, default='', help='.')
parser.add_argument('--test_sub_name', type=str, default='', help='.')
args = parser.parse_args()
# -- GPU settings
args.device = torch.device('cuda' if (bool(args.gpu_mode) and torch.cuda.is_available()) else 'cpu')
# -- set results directory
args.res_dir = f'./results/{args.test_name}/{args.test_sub_name}/' \
f'{datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S")}_{str(randrange(80))}'
return check_args(args)
def check_args(args):
"""
Check validity of the input arguments.
This function checks validity of the input arguments. It also stores the
settings by writing them to a file named `args.txt` in the `res_dir`
directory specified in the input arguments.
:param args: (argparse.Namespace) The command-line arguments.
:return: args: (argparse.Namespace) The validated input arguments.
"""
# -- create results directory
if not os.path.exists(args.res_dir):
os.makedirs(args.res_dir)
# -- turn thetas into floats
"""
FashionMNIST F_Oja test: theta_0, theta_9, theta_2 = 0.005 , -0.00 , 0.0007
FashionMNIST F0 test: theta_0, theta_9, theta_2 = 0.0018 , -0.00 , 0.
FashionMNIST F_BP test: theta_0, theta_9, theta_2 = 0.0119 , -0.00 , 0.
MNIST F_BP test: theta_0, theta_9, theta_2 = 0.0119 , -0.00 , 0.
MNIST F_0 test: theta_0, theta_9, theta_2 = 0.0029 , -0.00 , 0.
MNIST F_eHebb test: theta_0, theta_9, theta_2 =
MNIST F_Oja test: theta_0, theta_9, theta_2 =
MNIST F_bio test: theta_0, theta_9, theta_2 = 0.011 , -0.05 , -0.005
MNIST DFA F0 test: theta_0, theta_9, theta_2 = 0.0021 , -0.00 , 0.
"""
args.Theta = [float(item) for item in args.Theta]
# args.Theta =[0.08 , -0.00 , 0.] # F_FA
# args.Theta =[0.01, 0, -0.0040] # F_Oja
# -- store settings
with open(f'{args.res_dir}/args.txt', 'w') as fp:
for item in vars(args).items():
fp.write(f'{item[0]} : {item[1]}\n')
return args
def main():
"""
Main function for training classifier.
This function serves as the entry point for training the classifier model
and performs the following operations:
1) Loads and parses command-line arguments,
2) Initializes :class:`Train` and trains the model.
:return: None
"""
# -- load arguments
args = parse_args()
# -- train
my_train = Train(args)
my_train.train()
if __name__ == '__main__':
main()