@@ -464,8 +464,12 @@ model = torch.nn.Sequential(
464464 ).to(device)
465465
466466# The nn package also contains definitions of popular loss functions; in this
467- # case we will use Mean Squared Error (MSE) as our loss function.
468- loss_fn = torch.nn.MSELoss(size_average = False )
467+ # case we will use Mean Squared Error (MSE) as our loss function. Setting
468+ # reduction='sum' means that we are computing the *sum* of squared errors rather
469+ # than the mean; this is for consistency with the examples above where we
470+ # manually compute the loss, but in practice it is more common to use mean
471+ # squared error as a loss by setting reduction='elementwise_mean'.
472+ loss_fn = torch.nn.MSELoss(reduction = ' sum' )
469473
470474learning_rate = 1e-4
471475for t in range (500 ):
@@ -528,7 +532,7 @@ model = torch.nn.Sequential(
528532 torch.nn.ReLU(),
529533 torch.nn.Linear(H, D_out),
530534 )
531- loss_fn = torch.nn.MSELoss(size_average = False )
535+ loss_fn = torch.nn.MSELoss(reduction = ' sum ' )
532536
533537# Use the optim package to define an Optimizer that will update the weights of
534538# the model for us. Here we will use Adam; the optim package contains many other
@@ -603,7 +607,7 @@ model = TwoLayerNet(D_in, H, D_out)
603607# Construct our loss function and an Optimizer. The call to model.parameters()
604608# in the SGD constructor will contain the learnable parameters of the two
605609# nn.Linear modules which are members of the model.
606- loss_fn = torch.nn.MSELoss(size_average = False )
610+ loss_fn = torch.nn.MSELoss(reduction = ' sum ' )
607611optimizer = torch.optim.SGD(model.parameters(), lr = 1e-4 )
608612for t in range (500 ):
609613 # Forward pass: Compute predicted y by passing x to the model
@@ -683,7 +687,7 @@ model = DynamicNet(D_in, H, D_out)
683687
684688# Construct our loss function and an Optimizer. Training this strange model with
685689# vanilla stochastic gradient descent is tough, so we use momentum
686- criterion = torch.nn.MSELoss(size_average = False )
690+ criterion = torch.nn.MSELoss(reduction = ' sum ' )
687691optimizer = torch.optim.SGD(model.parameters(), lr = 1e-4 , momentum = 0.9 )
688692for t in range (500 ):
689693 # Forward pass: Compute predicted y by passing x to the model
0 commit comments