11[base]
22env_name = robocode
3- checkpoint_interval = 1000
3+ checkpoint_interval = 250
44
55[vec]
6- total_agents = 4096
6+ total_agents = 8192
77num_buffers = 8
88num_threads = 8
99num_frozen_banks = 1
1010frozen_bank_pct = 0.1
1111
1212[selfplay]
1313enabled = 1
14- max_size = 50
14+ max_size = 100
1515swap_winrate = 0.8
1616min_games = 2048
1717elo_init = 0
1818elo_k = 16
1919seed = 42
20- snapshot_interval = 100000000
21- opp_timeout_steps = 50000000
20+ snapshot_interval = 200000000
21+ opp_timeout_steps = 100000000
2222
2323[env]
2424num_agents = 2
2525num_bots = 0
2626width = 800
2727height = 600
28- reward_damage = 0.021068624865948224
29- reward_spot = 0.004051935227131666
30- dr = 0.0
28+ reward_damage = 0.1
29+ reward_spot = 9.61754e-06
30+ dr = 0.5
3131bot_policy = 1
3232max_ticks = 3000
3333
3434[policy]
35- hidden_size = 64
36- num_layers = 7.507560357123035
35+ hidden_size = 256
36+ num_layers = 2
3737expansion_factor = 1
3838
3939[train]
4040gpus = 1
4141seed = 42
42- total_timesteps = 1029041578.0
43- learning_rate = 0.0026942930808368315
42+ total_timesteps = 10_000_000_000
43+ learning_rate = 0.000151026
4444anneal_lr = 0
4545min_lr_ratio = 0
46- gamma = 0.9995099309223731
47- gae_lambda = 0.9564827193729608
48- replay_ratio = 3.2511695993037666
49- clip_coef = 0.4579889109958539
50- vf_coef = 2.1167609420658073
46+ gamma = 0.944963
47+ gae_lambda = 0.976712
48+ replay_ratio = 0.32871
49+ clip_coef = 0.989056
50+ vf_coef = 5
5151vf_clip_coef = 0.01
52- max_grad_norm = 0.1
53- ent_coef = 0.00019367286414309862
52+ max_grad_norm = 3.07396
53+ ent_coef = 1e-05
5454anneal_ent_coef = 0
5555min_ent_coef_ratio = 0.1
56- beta1 = 0.9823321577322727
57- beta2 = 0.9986847318526075
58- eps = 3.246193108635111e-08
59- minibatch_size = 65536
60- horizon = 128
61- vtrace_rho_clip = 4.442952029772186
62- vtrace_c_clip = 2.61275207877082
63- prio_alpha = 0.6684066423373816
64- prio_beta0 = 0.26728580880720837
56+ beta1 = 0.99738
57+ beta2 = 0.998954
58+ eps = 1.95855e-12
59+ minibatch_size = 8192
60+ horizon = 64
61+ vtrace_rho_clip = 1.51439
62+ vtrace_c_clip = 3.89253
63+ prio_alpha = 0.153154
64+ prio_beta0 = 0.923301
6565
6666[sweep]
67- gpus = 8
68- max_trial_seconds = 0
69- match_enemy_model_path = ' '
67+ match_enemy_model_path = ' resources/robocode/best_robo.bin'
7068match_num_games = 4096
7169match_max_ticks = 4096
7270match_enemy_hidden_size = 1024
7371match_enemy_num_layers = 2.69591
7472bot_eval = True
7573bot_eval_episodes = 32768
7674bot_eval_envs = 4096
77- bot_eval_burnin_episodes = 0
78- bot_eval_policy = 1
75+ bot_eval_burnin_episodes = 4096
76+ bot_eval_policy = - 1
7977bot_eval_max_ticks = 0
8078downsample = 1
8179metric = bot_perf
82- metric_distribution = linear
83- goal = maximize
84- max_suggestion_cost = 3600
85- max_runs = 1200
86- use_gpu = True
87- prune_pareto = True
88- early_stop_quantile = 0.3
8980
9081[sweep.train.total_timesteps]
9182distribution = log_normal
9283min = 1e8
93- max = 1e11
94- mean = 1e9
84+ max = 1e9
85+ mean = 5e8
9586scale = auto
9687
9788[sweep.env.reward_damage]
@@ -107,4 +98,3 @@ min = 0.0
10798max = 0.01
10899mean = 0.001
109100scale = auto
110-
0 commit comments