Skip to content

Commit 0e040b9

Browse files
committed
Strong eval
1 parent cf2ee46 commit 0e040b9

2 files changed

Lines changed: 38 additions & 31 deletions

File tree

config/robocode.ini

Lines changed: 38 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,9 @@
11
[base]
22
env_name = robocode
3-
checkpoint_interval = 250
3+
checkpoint_interval = 100
44

55
[vec]
6-
total_agents = 8192
6+
total_agents = 512
77
num_buffers = 8
88
num_threads = 8
99
num_frozen_banks = 1
@@ -27,41 +27,41 @@ width = 800
2727
height = 600
2828
reward_damage = 0.0
2929
reward_spot = 0.0
30-
dr = 0.5
30+
dr = 0.181647854498119
3131
bot_policy = 1
3232
max_ticks = 3000
3333

3434
[policy]
35-
hidden_size = 256
35+
hidden_size = 512
3636
num_layers = 2
3737
expansion_factor = 1
3838

3939
[train]
4040
gpus = 1
4141
seed = 42
42-
total_timesteps = 25_000_000_000
43-
learning_rate = 0.000151026
42+
total_timesteps = 1_000_000_000
43+
learning_rate = 0.0002253286413949516
4444
anneal_lr = 0
4545
min_lr_ratio = 0
46-
gamma = 0.944963
47-
gae_lambda = 0.976712
48-
replay_ratio = 0.32871
49-
clip_coef = 0.989056
50-
vf_coef = 5
51-
vf_clip_coef = 0.01
52-
max_grad_norm = 3.07396
46+
gamma = 0.9998480946489037
47+
gae_lambda = 0.9786838910227905
48+
replay_ratio = 0.6875892151812739
49+
clip_coef = 0.27868997013647245
50+
vf_coef = 2.7494762521624527
51+
vf_clip_coef = 0.1561600948481602
52+
max_grad_norm = 5.0
5353
ent_coef = 1e-05
5454
anneal_ent_coef = 0
5555
min_ent_coef_ratio = 0.1
56-
beta1 = 0.99738
57-
beta2 = 0.998954
58-
eps = 1.95855e-12
59-
minibatch_size = 8192
60-
horizon = 64
61-
vtrace_rho_clip = 1.51439
62-
vtrace_c_clip = 3.89253
63-
prio_alpha = 0.153154
64-
prio_beta0 = 0.923301
56+
beta1 = 0.9895985201801225
57+
beta2 = 0.9993748822217898
58+
eps = 8.79216857698271e-12
59+
minibatch_size = 32768
60+
horizon = 1024
61+
vtrace_rho_clip = 5.0
62+
vtrace_c_clip = 5.0
63+
prio_alpha = 0.5123292311684591
64+
prio_beta0 = 1.0
6565

6666
[sweep]
6767
match_enemy_model_path = 'resources/robocode/best_robo.bin'
@@ -85,16 +85,23 @@ max = 1e9
8585
mean = 5e8
8686
scale = auto
8787

88-
[sweep.env.reward_damage]
89-
distribution = uniform
90-
min = 0.0
91-
max = 0.1
92-
mean = 0.01
93-
scale = auto
88+
#[sweep.env.reward_damage]
89+
#distribution = uniform
90+
#min = 0.0
91+
#max = 0.1
92+
#mean = 0.01
93+
#scale = auto
94+
95+
#[sweep.env.reward_spot]
96+
#distribution = uniform
97+
#min = 0.0
98+
#max = 0.01
99+
#mean = 0.001
100+
#scale = auto
94101

95-
[sweep.env.reward_spot]
102+
[sweep.env.dr]
96103
distribution = uniform
97104
min = 0.0
98-
max = 0.01
99-
mean = 0.001
105+
max = 0.6
106+
mean = 0.3
100107
scale = auto

robocode.bin

4.56 MB
Binary file not shown.

0 commit comments

Comments
 (0)