|
45 | 45 | class Net(nn.Module): |
46 | 46 |
|
47 | 47 | def __init__(self): |
48 | | - super(Net, self).__init__() |
| 48 | + super().__init__() |
49 | 49 | # 1 input image channel, 6 output channels, 5x5 square convolution |
50 | 50 | # kernel |
51 | 51 | self.conv1 = nn.Conv2d(1, 6, 5) |
52 | 52 | self.conv2 = nn.Conv2d(6, 16, 5) |
53 | 53 | # an affine operation: y = Wx + b |
54 | | - self.fc1 = nn.Linear(16 * 5 * 5, 120) # 5*5 from image dimension |
| 54 | + self.fc1 = nn.Linear(16 * 5 * 5, 120) # 5*5 from image dimension |
55 | 55 | self.fc2 = nn.Linear(120, 84) |
56 | 56 | self.fc3 = nn.Linear(84, 10) |
57 | 57 |
|
@@ -205,7 +205,9 @@ def forward(self, input): |
205 | 205 | # |
206 | 206 | # |
207 | 207 | # Now we shall call ``loss.backward()``, and have a look at conv1's bias |
208 | | -# gradients before and after the backward. |
| 208 | +# gradients before and after the backward. Since we have not introduced an |
| 209 | +# optimizer yet, we clear the gradients directly on the model. Once using an |
| 210 | +# optimizer, prefer ``optimizer.zero_grad()`` as shown below. |
209 | 211 |
|
210 | 212 |
|
211 | 213 | net.zero_grad() # zeroes the gradient buffers of all parameters |
@@ -246,7 +248,8 @@ def forward(self, input): |
246 | 248 | # |
247 | 249 | # learning_rate = 0.01 |
248 | 250 | # for f in net.parameters(): |
249 | | -# f.data.sub_(f.grad.data * learning_rate) |
| 251 | +# with torch.no_grad(): |
| 252 | +# f -= f.grad * learning_rate |
250 | 253 | # |
251 | 254 | # However, as you use neural networks, you want to use various different |
252 | 255 | # update rules such as SGD, Nesterov-SGD, Adam, RMSProp, etc. |
|
0 commit comments