File tree Expand file tree Collapse file tree
Expand file tree Collapse file tree Original file line number Diff line number Diff line change @@ -76,8 +76,6 @@ def __init__(
7676 betas = (0.9 , 0.999 ),
7777 eps = 1e-8 ,
7878 weight_decay = 0 ,
79- amsgrad = False ,
80- optim_bits = 32 ,
8179 args = None ,
8280 min_8bit_size = 4096 ,
8381 percentile_clipping = 100 ,
@@ -98,10 +96,6 @@ def __init__(
9896 The epsilon value prevents division by zero in the optimizer.
9997 weight_decay (`float`, defaults to 0.0):
10098 The weight decay value for the optimizer.
101- amsgrad (`bool`, defaults to `False`):
102- Whether to use the [AMSGrad](https://hf.co/papers/1904.09237) variant of Adam that uses the maximum of past squared gradients instead.
103- optim_bits (`int`, defaults to 32):
104- The number of bits of the optimizer state.
10599 args (`object`, defaults to `None`):
106100 An object with additional arguments.
107101 min_8bit_size (`int`, defaults to 4096):
You can’t perform that action at this time.
0 commit comments