We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
1 parent 919ce6f commit 12122a1Copy full SHA for 12122a1
1 file changed
plugins/online-data-mixing/artifacts/custom_loop_usage.py
@@ -91,7 +91,10 @@ class State:
91
accelerator.backward(loss)
92
optimizer.step()
93
optimizer.zero_grad()
94
- if step_idx % 1 == 0 and accelerator.is_main_process:
+ loss = accelerator.gather(loss).mean()
95
+ if step_idx % 1 == 0:
96
+ if torch.isnan(loss):
97
+ raise ValueError("loss is nan")
98
print(f"Step {step_idx} ||| Loss: {loss.item():.4f}")
99
state.log_history.append(
100
{"loss": loss.item() if not torch.isnan(loss) else 1e100}
0 commit comments