File tree Expand file tree Collapse file tree
Expand file tree Collapse file tree Original file line number Diff line number Diff line change 11[base]
22env_name = robocode
3- checkpoint_interval = 250
3+ checkpoint_interval = 100
44
55[vec]
6- total_agents = 8192
6+ total_agents = 512
77num_buffers = 8
88num_threads = 8
99num_frozen_banks = 1
@@ -27,41 +27,41 @@ width = 800
2727height = 600
2828reward_damage = 0.0
2929reward_spot = 0.0
30- dr = 0.5
30+ dr = 0.181647854498119
3131bot_policy = 1
3232max_ticks = 3000
3333
3434[policy]
35- hidden_size = 256
35+ hidden_size = 512
3636num_layers = 2
3737expansion_factor = 1
3838
3939[train]
4040gpus = 1
4141seed = 42
42- total_timesteps = 25_000_000_000
43- learning_rate = 0.000151026
42+ total_timesteps = 1_000_000_000
43+ learning_rate = 0.0002253286413949516
4444anneal_lr = 0
4545min_lr_ratio = 0
46- gamma = 0.944963
47- gae_lambda = 0.976712
48- replay_ratio = 0.32871
49- clip_coef = 0.989056
50- vf_coef = 5
51- vf_clip_coef = 0.01
52- max_grad_norm = 3.07396
46+ gamma = 0.9998480946489037
47+ gae_lambda = 0.9786838910227905
48+ replay_ratio = 0.6875892151812739
49+ clip_coef = 0.27868997013647245
50+ vf_coef = 2.7494762521624527
51+ vf_clip_coef = 0.1561600948481602
52+ max_grad_norm = 5.0
5353ent_coef = 1e-05
5454anneal_ent_coef = 0
5555min_ent_coef_ratio = 0.1
56- beta1 = 0.99738
57- beta2 = 0.998954
58- eps = 1.95855e -12
59- minibatch_size = 8192
60- horizon = 64
61- vtrace_rho_clip = 1.51439
62- vtrace_c_clip = 3.89253
63- prio_alpha = 0.153154
64- prio_beta0 = 0.923301
56+ beta1 = 0.9895985201801225
57+ beta2 = 0.9993748822217898
58+ eps = 8.79216857698271e -12
59+ minibatch_size = 32768
60+ horizon = 1024
61+ vtrace_rho_clip = 5.0
62+ vtrace_c_clip = 5.0
63+ prio_alpha = 0.5123292311684591
64+ prio_beta0 = 1.0
6565
6666[sweep]
6767match_enemy_model_path = ' resources/robocode/best_robo.bin'
@@ -85,16 +85,23 @@ max = 1e9
8585mean = 5e8
8686scale = auto
8787
88- [sweep.env.reward_damage]
89- distribution = uniform
90- min = 0.0
91- max = 0.1
92- mean = 0.01
93- scale = auto
88+ # [sweep.env.reward_damage]
89+ # distribution = uniform
90+ # min = 0.0
91+ # max = 0.1
92+ # mean = 0.01
93+ # scale = auto
94+
95+ # [sweep.env.reward_spot]
96+ # distribution = uniform
97+ # min = 0.0
98+ # max = 0.01
99+ # mean = 0.001
100+ # scale = auto
94101
95- [sweep.env.reward_spot ]
102+ [sweep.env.dr ]
96103distribution = uniform
97104min = 0.0
98- max = 0.01
99- mean = 0.001
105+ max = 0.6
106+ mean = 0.3
100107scale = auto
You can’t perform that action at this time.
0 commit comments