@@ -24,6 +24,7 @@ typedef struct {
2424 float episode_length ; // Recommended metric: number of steps of agent episode
2525 // Any extra fields you add here may be exported to Python in binding.c
2626 float on_targets ; // Number of targets currently boxed
27+ float puzzle_ticks ; // Steps spent on the final puzzle of the episode
2728 float n ; // Required as the last field
2829} Log ;
2930
@@ -48,6 +49,7 @@ typedef struct {
4849 int size ;
4950 int num_agents ;
5051 int tick ;
52+ int puzzle_tick ;
5153 int max_steps ;
5254 int agent_x ;
5355 int agent_y ;
@@ -145,6 +147,7 @@ void init (Boxoban* env) {
145147void add_log (Boxoban * env ) {
146148 float perf ;
147149 float score ;
150+ float targets_hit = 0.0f ;
148151 if (env -> curriculum_mode ) {
149152 score = 0.0f ;
150153 if (env -> largest_solved_difficulty >= 0 ) {
@@ -155,11 +158,15 @@ void add_log(Boxoban* env) {
155158 perf = (env -> win == 1 ) ? 1.0f : 0.0f ;
156159 score = perf ;
157160 }
161+ if (env -> n_targets > 0 ) {
162+ targets_hit = (float )env -> on_target / (float )env -> n_targets ;
163+ }
158164 env -> log .perf += perf ;
159165 env -> log .score += score ;
160166 env -> log .episode_length += env -> tick ;
161167 env -> log .episode_return += env -> episode_return ;
162- env -> log .on_targets += env -> on_target ;
168+ env -> log .on_targets += targets_hit ;
169+ env -> log .puzzle_ticks += env -> puzzle_tick ;
163170 env -> log .n ++ ;
164171}
165172
@@ -192,11 +199,13 @@ static void load_random_puzzle(Boxoban* env) {
192199
193200 memcpy (env -> intermediate_rewards ,
194201 env -> observations + TARGET * env -> size * env -> size ,env -> size * env -> size );
202+ env -> puzzle_tick = 0 ;
195203}
196204
197205// Required function
198206void c_reset (Boxoban * env ) {
199207 env -> tick = 0 ;
208+ env -> puzzle_tick = 0 ;
200209 env -> win = 0 ;
201210 env -> episode_return = 0 ;
202211 if (env -> curriculum_mode ) {
@@ -209,6 +218,7 @@ void c_reset(Boxoban* env) {
209218
210219 if (!env -> initialized ) {
211220 env -> tick = rand_r (& env -> rng ) % env -> max_steps ;
221+ env -> puzzle_tick = env -> tick ;
212222 env -> initialized = true;
213223 }
214224}
@@ -281,6 +291,7 @@ int take_action(Boxoban* env, int action) {
281291// Required function
282292void c_step (Boxoban * env ) {
283293 env -> tick += 1 ;
294+ env -> puzzle_tick += 1 ;
284295 env -> terminals [0 ] = 0 ;
285296 env -> rewards [0 ] = 0.0 ;
286297
@@ -318,7 +329,7 @@ void c_step(Boxoban* env) {
318329 return ;
319330 }
320331
321- if (env -> tick >= env -> max_steps ) {
332+ if (env -> puzzle_tick >= env -> max_steps ) {
322333 env -> terminals [0 ] = 1 ;
323334 env -> rewards [0 ] -= 1.0 ;
324335 env -> episode_return += env -> rewards [0 ];
0 commit comments