Skip to content

Commit b04a695

Browse files
committed
chore: update ChaosGrad default lr to 1e-4 across implementation, docs, and tests
1 parent 4fea20c commit b04a695

5 files changed

Lines changed: 15 additions & 15 deletions

File tree

CONTRIBUTING.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -203,7 +203,7 @@ trainer = OdyssNetTrainer(model, lr=3e-4)
203203

204204
# ChaosGrad: optional zero-hyperparameter optimizer (pass as custom optimizer)
205205
from odyssnet import ChaosGrad
206-
opt = ChaosGrad(ChaosGrad.classify_params(model), lr=1e-3)
206+
opt = ChaosGrad(ChaosGrad.classify_params(model), lr=1e-4)
207207
trainer = OdyssNetTrainer(model, optimizer=opt)
208208
```
209209

docs/LIBRARY.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -223,7 +223,7 @@ trainer = OdyssNetTrainer(model, optimizer=torch.optim.AdamW(model.parameters(),
223223

224224
# ChaosGrad — zero-hyperparameter optimizer (optional, see ChaosGrad section below)
225225
from odyssnet import ChaosGrad
226-
opt = ChaosGrad(ChaosGrad.classify_params(model), lr=1e-3)
226+
opt = ChaosGrad(ChaosGrad.classify_params(model), lr=1e-4)
227227
trainer = OdyssNetTrainer(model, optimizer=opt)
228228
```
229229

@@ -512,7 +512,7 @@ from odyssnet import OdyssNet, OdyssNetTrainer, ChaosGrad
512512
model = OdyssNet(num_neurons=32, input_ids=[0], output_ids=[31], device='cuda')
513513

514514
# Classify parameters for group-specific meta-adaptation
515-
opt = ChaosGrad(ChaosGrad.classify_params(model), lr=1e-3)
515+
opt = ChaosGrad(ChaosGrad.classify_params(model), lr=1e-4)
516516
trainer = OdyssNetTrainer(model, optimizer=opt, device='cuda')
517517

518518
for epoch in range(100):

odyssnet/training/chaos_optimizer.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
from odyssnet.training.chaos_optimizer import ChaosGrad
1010
1111
model = OdyssNet(num_neurons=32, input_ids=[0], output_ids=[31])
12-
opt = ChaosGrad(ChaosGrad.classify_params(model), lr=1e-3)
12+
opt = ChaosGrad(ChaosGrad.classify_params(model), lr=1e-4)
1313
trainer = OdyssNetTrainer(model, optimizer=opt)
1414
1515
Algorithm (v3 improvements over the removed v2.2):
@@ -38,14 +38,14 @@ class ChaosGrad(torch.optim.Optimizer):
3838
- per_param_alpha : gradient-centralization gate
3939
4040
The single user-facing parameter ``lr`` (genesis learning rate,
41-
default 1e-3) is a mathematical starting point, not a dial to tune.
41+
default 1e-4) is a mathematical starting point, not a dial to tune.
4242
4343
Args:
4444
params: Iterable of parameters **or** a list of classified param-group
4545
dicts returned by :meth:`classify_params`. Providing classified
4646
groups enables group-specific decay seeding, per-group beta
4747
equilibria, and the Hebbian bypass rule.
48-
lr (float): Genesis learning rate. Default: ``1e-3``.
48+
lr (float): Genesis learning rate. Default: ``1e-4``.
4949
"""
5050

5151
# ------------------------------------------------------------------ #
@@ -133,7 +133,7 @@ class ChaosGrad(torch.optim.Optimizer):
133133
# Construction #
134134
# ------------------------------------------------------------------ #
135135

136-
def __init__(self, params, lr: float = 1e-3) -> None:
136+
def __init__(self, params, lr: float = 1e-4) -> None:
137137
if lr <= 0:
138138
raise ValueError(f"Genesis learning rate must be > 0, got {lr}")
139139
defaults = dict(

tests/training/test_chaos_optimizer.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@ def _model(n=8, in_ids=None, out_ids=None, **kwargs):
4242
device="cpu", **kwargs)
4343

4444

45-
def _opt(model, lr=1e-3):
45+
def _opt(model, lr=1e-4):
4646
return ChaosGrad(ChaosGrad.classify_params(model), lr=lr)
4747

4848

@@ -148,7 +148,7 @@ def test_gates_beta_equil_is_0_85(self):
148148

149149
def test_plain_params_accepted(self):
150150
m = _model()
151-
opt = ChaosGrad(m.parameters(), lr=1e-3)
151+
opt = ChaosGrad(m.parameters(), lr=1e-4)
152152
assert opt is not None
153153

154154

@@ -183,7 +183,7 @@ def test_W_diagonal_stays_zero(self):
183183

184184
def test_sparse_gradient_raises(self):
185185
embed = torch.nn.Embedding(10, 4, sparse=True)
186-
opt = ChaosGrad(embed.parameters(), lr=1e-3)
186+
opt = ChaosGrad(embed.parameters(), lr=1e-4)
187187
idx = torch.tensor([0, 2])
188188
out = embed(idx).sum()
189189
out.backward()

tests/training/test_chaos_optimizer_extra.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ def _model(n=8, **kwargs):
2929
device='cpu', **kwargs)
3030

3131

32-
def _opt(model, lr=1e-3):
32+
def _opt(model, lr=1e-4):
3333
return ChaosGrad(ChaosGrad.classify_params(model), lr=lr)
3434

3535

@@ -126,11 +126,11 @@ def test_lr_group_override_propagates(self):
126126
from group, so the new value must take effect on the next step.
127127
"""
128128
m = _model()
129-
opt = _opt(m, lr=1e-3)
129+
opt = _opt(m, lr=1e-4)
130130
_one_step_raw(opt, m)
131131

132132
for pg in opt.param_groups:
133-
pg['lr'] = 5e-4
133+
pg['lr'] = 5e-5
134134

135135
W_before = m.W.data.clone()
136136
_one_step_raw(opt, m)
@@ -279,15 +279,15 @@ def test_gradient_checkpointing_compatible(self):
279279
def test_plain_params_no_crash(self):
280280
"""ChaosGrad with plain model.parameters() (no classify_params) must train."""
281281
m = _model()
282-
opt = ChaosGrad(m.parameters(), lr=1e-3)
282+
opt = ChaosGrad(m.parameters(), lr=1e-4)
283283
t = OdyssNetTrainer(m, optimizer=opt)
284284
loss = _step(t, n=3)
285285
assert math.isfinite(loss)
286286

287287
def test_plain_params_hebbian_no_bypass(self):
288288
"""Without classify_params, hebb params get lightweight treatment (no crash)."""
289289
m = _model(hebb_type='global')
290-
opt = ChaosGrad(m.parameters(), lr=1e-3)
290+
opt = ChaosGrad(m.parameters(), lr=1e-4)
291291
t = OdyssNetTrainer(m, optimizer=opt)
292292
loss = _step(t, n=5)
293293
assert math.isfinite(loss)

0 commit comments

Comments
 (0)