Skip to content

Commit ed16ec6

Browse files
author
Tim-phant
committed
Track Boxoban per-puzzle timeout and final puzzle tick
1 parent 0fdb8e3 commit ed16ec6

2 files changed

Lines changed: 14 additions & 2 deletions

File tree

ocean/boxoban/binding.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,4 +30,5 @@ void my_log(Log* log, Dict* out) {
3030
dict_set(out, "episode_return", log->episode_return);
3131
dict_set(out, "episode_length", log->episode_length);
3232
dict_set(out, "targets_hit", log->on_targets);
33+
dict_set(out, "final_puzzle_tick", log->puzzle_ticks);
3334
}

ocean/boxoban/boxoban.h

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ typedef struct {
2424
float episode_length; // Recommended metric: number of steps of agent episode
2525
// Any extra fields you add here may be exported to Python in binding.c
2626
float on_targets; // Number of targets currently boxed
27+
float puzzle_ticks; // Steps spent on the final puzzle of the episode
2728
float n; // Required as the last field
2829
} Log;
2930

@@ -48,6 +49,7 @@ typedef struct {
4849
int size;
4950
int num_agents;
5051
int tick;
52+
int puzzle_tick;
5153
int max_steps;
5254
int agent_x;
5355
int agent_y;
@@ -145,6 +147,7 @@ void init (Boxoban* env) {
145147
void add_log(Boxoban* env) {
146148
float perf;
147149
float score;
150+
float targets_hit = 0.0f;
148151
if (env->curriculum_mode) {
149152
score = 0.0f;
150153
if (env->largest_solved_difficulty >= 0) {
@@ -155,11 +158,15 @@ void add_log(Boxoban* env) {
155158
perf = (env->win == 1) ? 1.0f : 0.0f;
156159
score = perf;
157160
}
161+
if (env->n_targets > 0) {
162+
targets_hit = (float)env->on_target / (float)env->n_targets;
163+
}
158164
env->log.perf += perf;
159165
env->log.score += score;
160166
env->log.episode_length += env->tick;
161167
env->log.episode_return += env->episode_return;
162-
env->log.on_targets += env->on_target;
168+
env->log.on_targets += targets_hit;
169+
env->log.puzzle_ticks += env->puzzle_tick;
163170
env->log.n++;
164171
}
165172

@@ -192,11 +199,13 @@ static void load_random_puzzle(Boxoban* env) {
192199

193200
memcpy(env->intermediate_rewards,
194201
env->observations + TARGET * env->size * env->size,env->size * env->size);
202+
env->puzzle_tick = 0;
195203
}
196204

197205
// Required function
198206
void c_reset(Boxoban* env) {
199207
env->tick = 0;
208+
env->puzzle_tick = 0;
200209
env->win = 0;
201210
env->episode_return = 0;
202211
if (env->curriculum_mode) {
@@ -209,6 +218,7 @@ void c_reset(Boxoban* env) {
209218

210219
if (!env->initialized) {
211220
env->tick = rand_r(&env->rng) % env->max_steps;
221+
env->puzzle_tick = env->tick;
212222
env->initialized = true;
213223
}
214224
}
@@ -281,6 +291,7 @@ int take_action(Boxoban* env, int action) {
281291
// Required function
282292
void c_step(Boxoban* env) {
283293
env->tick += 1;
294+
env->puzzle_tick += 1;
284295
env->terminals[0] = 0;
285296
env->rewards[0] = 0.0;
286297

@@ -318,7 +329,7 @@ void c_step(Boxoban* env) {
318329
return;
319330
}
320331

321-
if (env->tick >= env->max_steps) {
332+
if (env->puzzle_tick >= env->max_steps) {
322333
env->terminals[0] = 1;
323334
env->rewards[0] -= 1.0;
324335
env->episode_return += env->rewards[0];

0 commit comments

Comments
 (0)