Skip to content

Commit f662e15

Browse files
committed
try old config
1 parent 8c5f274 commit f662e15

2 files changed

Lines changed: 33 additions & 43 deletions

File tree

config/robocode.ini

Lines changed: 33 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -1,97 +1,88 @@
11
[base]
22
env_name = robocode
3-
checkpoint_interval = 1000
3+
checkpoint_interval = 250
44

55
[vec]
6-
total_agents = 4096
6+
total_agents = 8192
77
num_buffers = 8
88
num_threads = 8
99
num_frozen_banks = 1
1010
frozen_bank_pct = 0.1
1111

1212
[selfplay]
1313
enabled = 1
14-
max_size = 50
14+
max_size = 100
1515
swap_winrate = 0.8
1616
min_games = 2048
1717
elo_init = 0
1818
elo_k = 16
1919
seed = 42
20-
snapshot_interval = 100000000
21-
opp_timeout_steps = 50000000
20+
snapshot_interval = 200000000
21+
opp_timeout_steps = 100000000
2222

2323
[env]
2424
num_agents = 2
2525
num_bots = 0
2626
width = 800
2727
height = 600
28-
reward_damage = 0.021068624865948224
29-
reward_spot = 0.004051935227131666
30-
dr = 0.0
28+
reward_damage = 0.1
29+
reward_spot = 9.61754e-06
30+
dr = 0.5
3131
bot_policy = 1
3232
max_ticks = 3000
3333

3434
[policy]
35-
hidden_size = 64
36-
num_layers = 7.507560357123035
35+
hidden_size = 256
36+
num_layers = 2
3737
expansion_factor = 1
3838

3939
[train]
4040
gpus = 1
4141
seed = 42
42-
total_timesteps = 1029041578.0
43-
learning_rate = 0.0026942930808368315
42+
total_timesteps = 10_000_000_000
43+
learning_rate = 0.000151026
4444
anneal_lr = 0
4545
min_lr_ratio = 0
46-
gamma = 0.9995099309223731
47-
gae_lambda = 0.9564827193729608
48-
replay_ratio = 3.2511695993037666
49-
clip_coef = 0.4579889109958539
50-
vf_coef = 2.1167609420658073
46+
gamma = 0.944963
47+
gae_lambda = 0.976712
48+
replay_ratio = 0.32871
49+
clip_coef = 0.989056
50+
vf_coef = 5
5151
vf_clip_coef = 0.01
52-
max_grad_norm = 0.1
53-
ent_coef = 0.00019367286414309862
52+
max_grad_norm = 3.07396
53+
ent_coef = 1e-05
5454
anneal_ent_coef = 0
5555
min_ent_coef_ratio = 0.1
56-
beta1 = 0.9823321577322727
57-
beta2 = 0.9986847318526075
58-
eps = 3.246193108635111e-08
59-
minibatch_size = 65536
60-
horizon = 128
61-
vtrace_rho_clip = 4.442952029772186
62-
vtrace_c_clip = 2.61275207877082
63-
prio_alpha = 0.6684066423373816
64-
prio_beta0 = 0.26728580880720837
56+
beta1 = 0.99738
57+
beta2 = 0.998954
58+
eps = 1.95855e-12
59+
minibatch_size = 8192
60+
horizon = 64
61+
vtrace_rho_clip = 1.51439
62+
vtrace_c_clip = 3.89253
63+
prio_alpha = 0.153154
64+
prio_beta0 = 0.923301
6565

6666
[sweep]
67-
gpus = 8
68-
max_trial_seconds = 0
69-
match_enemy_model_path = ''
67+
match_enemy_model_path = 'resources/robocode/best_robo.bin'
7068
match_num_games = 4096
7169
match_max_ticks = 4096
7270
match_enemy_hidden_size = 1024
7371
match_enemy_num_layers = 2.69591
7472
bot_eval = True
7573
bot_eval_episodes = 32768
7674
bot_eval_envs = 4096
77-
bot_eval_burnin_episodes = 0
78-
bot_eval_policy = 1
75+
bot_eval_burnin_episodes = 4096
76+
bot_eval_policy = -1
7977
bot_eval_max_ticks = 0
8078
downsample = 1
8179
metric = bot_perf
82-
metric_distribution = linear
83-
goal = maximize
84-
max_suggestion_cost = 3600
85-
max_runs = 1200
86-
use_gpu = True
87-
prune_pareto = True
88-
early_stop_quantile = 0.3
8980

9081
[sweep.train.total_timesteps]
9182
distribution = log_normal
9283
min = 1e8
93-
max = 1e11
94-
mean = 1e9
84+
max = 1e9
85+
mean = 5e8
9586
scale = auto
9687

9788
[sweep.env.reward_damage]
@@ -107,4 +98,3 @@ min = 0.0
10798
max = 0.01
10899
mean = 0.001
109100
scale = auto
110-

robocode.bin

1.22 MB
Binary file not shown.

0 commit comments

Comments
 (0)