|
3 | 3 | {"train/step": 250, "train/loss": 0.133676677942276} |
4 | 4 | {"train/step": 500, "train/loss": 0.10870859771966934} |
5 | 5 | {"train/param_count": 462209} |
| 6 | +{"train/param_count": 462209} |
| 7 | +{"train/param_count": 29121} |
| 8 | +{"train/step": 270, "train/loss": 0.541170060634613} |
| 9 | +{"train/step": 280, "train/loss": 0.5225092768669128} |
| 10 | +{"train/step": 290, "train/loss": 0.5260707139968872} |
| 11 | +{"train/step": 300, "train/loss": 0.37261825799942017} |
| 12 | +{"train/step": 310, "train/loss": 0.33682766556739807} |
| 13 | +{"train/step": 320, "train/loss": 0.2880370616912842} |
| 14 | +{"train/step": 330, "train/loss": 0.31903398036956787} |
| 15 | +{"train/step": 340, "train/loss": 0.2813790440559387} |
| 16 | +{"train/step": 350, "train/loss": 0.2924691140651703} |
| 17 | +{"train/step": 360, "train/loss": 0.23942498862743378} |
| 18 | +{"train/step": 370, "train/loss": 0.2546539902687073} |
| 19 | +{"train/step": 380, "train/loss": 0.24907346069812775} |
| 20 | +{"train/step": 390, "train/loss": 0.27687424421310425} |
| 21 | +{"train/step": 400, "train/loss": 0.24805119633674622} |
| 22 | +{"train/step": 410, "train/loss": 0.2345900982618332} |
| 23 | +{"train/step": 420, "train/loss": 0.23793001472949982} |
| 24 | +{"train/step": 430, "train/loss": 0.22438864409923553} |
| 25 | +{"train/step": 440, "train/loss": 0.23558945953845978} |
| 26 | +{"train/step": 450, "train/loss": 0.23294863104820251} |
| 27 | +{"train/step": 460, "train/loss": 0.23077797889709473} |
| 28 | +{"train/step": 470, "train/loss": 0.2180887907743454} |
| 29 | +{"train/step": 480, "train/loss": 0.24870410561561584} |
| 30 | +{"train/step": 490, "train/loss": 0.21754756569862366} |
| 31 | +{"train/step": 500, "train/loss": 0.22995784878730774} |
| 32 | +{"train/step": 510, "train/loss": 0.22937336564064026} |
| 33 | +{"train/step": 520, "train/loss": 0.2326231300830841} |
| 34 | +{"train/step": 530, "train/loss": 0.21191641688346863} |
| 35 | +{"train/step": 250, "train/loss": 0.133676677942276} |
| 36 | +{"train/param_count": 29121} |
| 37 | +{"train/step": 270, "train/loss": 0.541170060634613} |
| 38 | +{"train/step": 280, "train/loss": 0.5225092768669128} |
| 39 | +{"train/step": 290, "train/loss": 0.5260707139968872} |
| 40 | +{"train/step": 300, "train/loss": 0.37261825799942017} |
| 41 | +{"train/step": 310, "train/loss": 0.33682766556739807} |
| 42 | +{"train/step": 320, "train/loss": 0.2880370616912842} |
| 43 | +{"train/step": 330, "train/loss": 0.31903398036956787} |
| 44 | +{"train/step": 340, "train/loss": 0.2813790440559387} |
| 45 | +{"train/step": 350, "train/loss": 0.2924691140651703} |
| 46 | +{"train/step": 360, "train/loss": 0.23942498862743378} |
| 47 | +{"train/step": 370, "train/loss": 0.2546539902687073} |
| 48 | +{"train/step": 380, "train/loss": 0.24907346069812775} |
| 49 | +{"train/step": 390, "train/loss": 0.27687424421310425} |
| 50 | +{"train/step": 400, "train/loss": 0.24805119633674622} |
| 51 | +{"train/step": 410, "train/loss": 0.2345900982618332} |
| 52 | +{"train/step": 420, "train/loss": 0.23793001472949982} |
| 53 | +{"train/step": 430, "train/loss": 0.22438864409923553} |
| 54 | +{"train/step": 440, "train/loss": 0.23558945953845978} |
| 55 | +{"train/step": 450, "train/loss": 0.23294863104820251} |
| 56 | +{"train/step": 460, "train/loss": 0.23077797889709473} |
| 57 | +{"train/step": 470, "train/loss": 0.2180887907743454} |
| 58 | +{"train/step": 480, "train/loss": 0.24870410561561584} |
| 59 | +{"train/step": 490, "train/loss": 0.21754756569862366} |
| 60 | +{"train/step": 500, "train/loss": 0.22995784878730774} |
| 61 | +{"train/step": 510, "train/loss": 0.22937336564064026} |
| 62 | +{"train/step": 520, "train/loss": 0.2326231300830841} |
| 63 | +{"train/step": 530, "train/loss": 0.21191641688346863} |
| 64 | +{"train/step": 500, "train/loss": 0.10870859771966934} |
0 commit comments