From 3159b9798943f03e6ff589bd202258c2360c3739 Mon Sep 17 00:00:00 2001 From: M K Date: Wed, 10 Jun 2026 17:28:50 +0200 Subject: [PATCH 1/4] Add Flappy environment --- config/flappy.ini | 40 +++++++ ocean/flappy/binding.c | 38 ++++++ ocean/flappy/flappy.c | 46 +++++++ ocean/flappy/flappy.h | 264 +++++++++++++++++++++++++++++++++++++++++ 4 files changed, 388 insertions(+) create mode 100644 config/flappy.ini create mode 100644 ocean/flappy/binding.c create mode 100644 ocean/flappy/flappy.c create mode 100644 ocean/flappy/flappy.h diff --git a/config/flappy.ini b/config/flappy.ini new file mode 100644 index 0000000000..e48a53e570 --- /dev/null +++ b/config/flappy.ini @@ -0,0 +1,40 @@ +[base] +env_name = flappy + +[vec] +total_agents = 2048 +num_buffers = 4 +num_threads = 8 + +[policy] +hidden_size = 64 +num_layers = 2 + +[env] +width = 420 +height = 640 +max_steps = 4096 +gravity = 0.45 +flap_velocity = -7.5 +pipe_speed = 3.0 +pipe_gap = 190.0 +pipe_width = 58.0 +pipe_spacing = 220.0 +first_pipe_x = 220.0 +bird_x = 96.0 +bird_radius = 14.0 +alive_reward = 0.01 +pass_reward = 1.0 +crash_reward = -1.0 +center_reward = 0.03 + +[train] +total_timesteps = 20_000_000 +learning_rate = 0.01 +gamma = 0.995 +gae_lambda = 0.92 +ent_coef = 0.003 +vf_coef = 1.5 +minibatch_size = 16_384 +horizon = 64 + diff --git a/ocean/flappy/binding.c b/ocean/flappy/binding.c new file mode 100644 index 0000000000..96790d9695 --- /dev/null +++ b/ocean/flappy/binding.c @@ -0,0 +1,38 @@ +#include "flappy.h" + +#define OBS_SIZE FLAPPY_OBS_SIZE +#define NUM_ATNS 1 +#define ACT_SIZES {2} +#define OBS_TENSOR_T FloatTensor + +#define Env Flappy +#include "vecenv.h" + +void my_init(Env* env, Dict* kwargs) { + env->num_agents = 1; + env->width = dict_get(kwargs, "width")->value; + env->height = dict_get(kwargs, "height")->value; + env->max_steps = dict_get(kwargs, "max_steps")->value; + env->gravity = dict_get(kwargs, "gravity")->value; + env->flap_velocity = dict_get(kwargs, "flap_velocity")->value; + env->pipe_speed = dict_get(kwargs, "pipe_speed")->value; + env->pipe_gap = dict_get(kwargs, "pipe_gap")->value; + env->pipe_width = dict_get(kwargs, "pipe_width")->value; + env->pipe_spacing = dict_get(kwargs, "pipe_spacing")->value; + env->first_pipe_x = dict_get(kwargs, "first_pipe_x")->value; + env->bird_x = dict_get(kwargs, "bird_x")->value; + env->bird_radius = dict_get(kwargs, "bird_radius")->value; + env->alive_reward = dict_get(kwargs, "alive_reward")->value; + env->pass_reward = dict_get(kwargs, "pass_reward")->value; + env->crash_reward = dict_get(kwargs, "crash_reward")->value; + env->center_reward = dict_get(kwargs, "center_reward")->value; + init(env); +} + +void my_log(Log* log, Dict* out) { + dict_set(out, "perf", log->perf); + dict_set(out, "score", log->score); + dict_set(out, "episode_return", log->episode_return); + dict_set(out, "episode_length", log->episode_length); +} + diff --git a/ocean/flappy/flappy.c b/ocean/flappy/flappy.c new file mode 100644 index 0000000000..b7d6032927 --- /dev/null +++ b/ocean/flappy/flappy.c @@ -0,0 +1,46 @@ +#include +#include +#include "flappy.h" + +int main(void) { + Flappy env = { + .width = 420, + .height = 640, + .max_steps = 4096, + .gravity = 0.45f, + .flap_velocity = -7.5f, + .pipe_speed = 3.0f, + .pipe_gap = 190.0f, + .pipe_width = 58.0f, + .pipe_spacing = 220.0f, + .first_pipe_x = 220.0f, + .bird_x = 96.0f, + .bird_radius = 14.0f, + .alive_reward = 0.01f, + .pass_reward = 1.0f, + .crash_reward = -1.0f, + .center_reward = 0.03f, + .rng = (unsigned int)time(NULL), + }; + + float observations[FLAPPY_OBS_SIZE] = {0}; + float actions[1] = {0}; + float rewards[1] = {0}; + float terminals[1] = {0}; + env.observations = observations; + env.actions = actions; + env.rewards = rewards; + env.terminals = terminals; + init(&env); + c_reset(&env); + + while (!WindowShouldClose()) { + env.actions[0] = IsKeyPressed(KEY_SPACE) || IsKeyPressed(KEY_UP) ? FLAPPY_FLAP : FLAPPY_NOOP; + c_step(&env); + c_render(&env); + } + + c_close(&env); + return 0; +} + diff --git a/ocean/flappy/flappy.h b/ocean/flappy/flappy.h new file mode 100644 index 0000000000..21f03ee522 --- /dev/null +++ b/ocean/flappy/flappy.h @@ -0,0 +1,264 @@ +#pragma once + +#include +#include +#include +#include +#include "raylib.h" + +#define FLAPPY_OBS_SIZE 6 +#define FLAPPY_NUM_PIPES 3 + +#define FLAPPY_NOOP 0 +#define FLAPPY_FLAP 1 + +typedef struct Log Log; +struct Log { + float perf; + float score; + float episode_return; + float episode_length; + float n; +}; + +typedef struct Client Client; +struct Client { + Texture2D puffer; +}; + +typedef struct Pipe Pipe; +struct Pipe { + float x; + float gap_y; + int passed; +}; + +typedef struct Flappy Flappy; +struct Flappy { + float* observations; + float* actions; + float* rewards; + float* terminals; + Log log; + int num_agents; + unsigned int rng; + Client* client; + + int width; + int height; + int max_steps; + float gravity; + float flap_velocity; + float pipe_speed; + float pipe_gap; + float pipe_width; + float pipe_spacing; + float first_pipe_x; + float bird_x; + float bird_radius; + float alive_reward; + float pass_reward; + float crash_reward; + float center_reward; + + float bird_y; + float bird_vy; + int tick; + int score; + float episode_return; + Pipe pipes[FLAPPY_NUM_PIPES]; +}; + +static inline float flappy_randf(Flappy* env) { + return (float)rand_r(&env->rng) / (float)RAND_MAX; +} + +static inline float flappy_clampf(float v, float lo, float hi) { + return fminf(fmaxf(v, lo), hi); +} + +static inline float flappy_pipe_gap_y(Flappy* env) { + float margin = env->pipe_gap * 0.6f; + return margin + flappy_randf(env) * (env->height - 2.0f * margin); +} + +static inline void flappy_init_pipe(Flappy* env, int i, float x) { + env->pipes[i].x = x; + env->pipes[i].gap_y = flappy_pipe_gap_y(env); + env->pipes[i].passed = 0; +} + +static inline Pipe* flappy_next_pipe(Flappy* env) { + Pipe* best = &env->pipes[0]; + float best_dx = best->x + env->pipe_width - env->bird_x; + for (int i = 1; i < FLAPPY_NUM_PIPES; i++) { + float dx = env->pipes[i].x + env->pipe_width - env->bird_x; + if (dx >= 0.0f && (best_dx < 0.0f || dx < best_dx)) { + best = &env->pipes[i]; + best_dx = dx; + } + } + return best; +} + +static inline void flappy_compute_observations(Flappy* env) { + Pipe* pipe = flappy_next_pipe(env); + float dx = (pipe->x + env->pipe_width - env->bird_x) / env->width; + float dy = (env->bird_y - pipe->gap_y) / env->height; + + env->observations[0] = env->bird_y / env->height; + env->observations[1] = flappy_clampf(env->bird_vy / 16.0f, -1.0f, 1.0f); + env->observations[2] = flappy_clampf(dx, 0.0f, 1.5f); + env->observations[3] = pipe->gap_y / env->height; + env->observations[4] = flappy_clampf(dy, -1.0f, 1.0f); + env->observations[5] = flappy_clampf((float)env->score / 20.0f, 0.0f, 1.0f); +} + +static inline void flappy_add_log(Flappy* env) { + env->log.perf += flappy_clampf((float)env->score / 20.0f, 0.0f, 1.0f); + env->log.score += env->score; + env->log.episode_return += env->episode_return; + env->log.episode_length += env->tick; + env->log.n += 1.0f; +} + +void init(Flappy* env) { + env->num_agents = 1; + env->client = NULL; + memset(&env->log, 0, sizeof(Log)); +} + +void c_reset(Flappy* env) { + env->bird_y = env->height * 0.5f; + env->bird_vy = 0.0f; + env->tick = 0; + env->score = 0; + env->episode_return = 0.0f; + float start_x = env->first_pipe_x; + for (int i = 0; i < FLAPPY_NUM_PIPES; i++) { + flappy_init_pipe(env, i, start_x + i * env->pipe_spacing); + } + + flappy_compute_observations(env); +} + +void c_step(Flappy* env) { + env->tick += 1; + env->rewards[0] = env->alive_reward; + env->terminals[0] = 0.0f; + + int action = (int)env->actions[0]; + if (action == FLAPPY_FLAP) { + env->bird_vy = env->flap_velocity; + } + + env->bird_vy += env->gravity; + env->bird_y += env->bird_vy; + + float max_x = 0.0f; + for (int i = 0; i < FLAPPY_NUM_PIPES; i++) { + if (env->pipes[i].x > max_x) { + max_x = env->pipes[i].x; + } + } + + bool done = false; + if (env->bird_y - env->bird_radius < 0.0f || env->bird_y + env->bird_radius > env->height) { + done = true; + } + + for (int i = 0; i < FLAPPY_NUM_PIPES; i++) { + Pipe* pipe = &env->pipes[i]; + pipe->x -= env->pipe_speed; + + if (!pipe->passed && pipe->x + env->pipe_width < env->bird_x) { + pipe->passed = 1; + env->score += 1; + env->rewards[0] += env->pass_reward; + } + + bool overlap_x = env->bird_x + env->bird_radius > pipe->x && + env->bird_x - env->bird_radius < pipe->x + env->pipe_width; + bool outside_gap = env->bird_y - env->bird_radius < pipe->gap_y - env->pipe_gap * 0.5f || + env->bird_y + env->bird_radius > pipe->gap_y + env->pipe_gap * 0.5f; + if (overlap_x && outside_gap) { + done = true; + } + + if (pipe->x + env->pipe_width < 0.0f) { + flappy_init_pipe(env, i, max_x + env->pipe_spacing); + max_x = env->pipes[i].x; + } + } + + Pipe* next = flappy_next_pipe(env); + float center_error = fabsf(env->bird_y - next->gap_y) / (env->height * 0.5f); + env->rewards[0] += env->center_reward * (1.0f - flappy_clampf(center_error, 0.0f, 1.0f)); + + if (env->tick >= env->max_steps) { + done = true; + } + + if (done) { + env->rewards[0] += env->crash_reward; + env->terminals[0] = 1.0f; + env->episode_return += env->rewards[0]; + flappy_add_log(env); + c_reset(env); + return; + } + + env->episode_return += env->rewards[0]; + flappy_compute_observations(env); +} + +void c_render(Flappy* env) { + if (env->client == NULL) { + env->client = (Client*)calloc(1, sizeof(Client)); + InitWindow(env->width, env->height, "PufferLib Flappy"); + SetTargetFPS(60); + env->client->puffer = LoadTexture("resources/shared/puffers_128.png"); + } + + if (IsKeyDown(KEY_ESCAPE)) { + exit(0); + } + + BeginDrawing(); + ClearBackground((Color){6, 24, 24, 255}); + + DrawRectangle(0, env->height - 24, env->width, 24, (Color){40, 120, 84, 255}); + for (int i = 0; i < FLAPPY_NUM_PIPES; i++) { + Pipe* pipe = &env->pipes[i]; + int gap_top = (int)(pipe->gap_y - env->pipe_gap * 0.5f); + int gap_bottom = (int)(pipe->gap_y + env->pipe_gap * 0.5f); + DrawRectangle((int)pipe->x, 0, (int)env->pipe_width, gap_top, (Color){0, 187, 187, 255}); + DrawRectangle((int)pipe->x, gap_bottom, (int)env->pipe_width, + env->height - gap_bottom, (Color){0, 187, 187, 255}); + } + + float sprite_size = env->bird_radius * 3.0f; + float rotation = flappy_clampf(env->bird_vy * 3.0f, -35.0f, 35.0f); + DrawTexturePro( + env->client->puffer, + (Rectangle){0, 0, 128, 128}, + (Rectangle){env->bird_x, env->bird_y, sprite_size, sprite_size}, + (Vector2){sprite_size * 0.5f, sprite_size * 0.5f}, + rotation, + WHITE + ); + DrawText(TextFormat("Score: %i", env->score), 12, 12, 24, (Color){241, 241, 241, 255}); + EndDrawing(); +} + +void c_close(Flappy* env) { + if (env->client != NULL) { + if (IsWindowReady()) { + UnloadTexture(env->client->puffer); + CloseWindow(); + } + free(env->client); + env->client = NULL; + } +} + From 94bcc892887fa359e3ef39dfff4a04c4bfbca9e3 Mon Sep 17 00:00:00 2001 From: M K Date: Wed, 10 Jun 2026 19:04:30 +0200 Subject: [PATCH 2/4] Update Flappy observations --- ocean/flappy/flappy.h | 49 +++++++++++++++++++++++++++++++------------ 1 file changed, 36 insertions(+), 13 deletions(-) diff --git a/ocean/flappy/flappy.h b/ocean/flappy/flappy.h index 21f03ee522..e5898f47ce 100644 --- a/ocean/flappy/flappy.h +++ b/ocean/flappy/flappy.h @@ -6,7 +6,7 @@ #include #include "raylib.h" -#define FLAPPY_OBS_SIZE 6 +#define FLAPPY_OBS_SIZE 7 #define FLAPPY_NUM_PIPES 3 #define FLAPPY_NOOP 0 @@ -88,30 +88,48 @@ static inline void flappy_init_pipe(Flappy* env, int i, float x) { env->pipes[i].passed = 0; } -static inline Pipe* flappy_next_pipe(Flappy* env) { - Pipe* best = &env->pipes[0]; - float best_dx = best->x + env->pipe_width - env->bird_x; - for (int i = 1; i < FLAPPY_NUM_PIPES; i++) { - float dx = env->pipes[i].x + env->pipe_width - env->bird_x; - if (dx >= 0.0f && (best_dx < 0.0f || dx < best_dx)) { - best = &env->pipes[i]; - best_dx = dx; +static inline void flappy_next_pipes(Flappy* env, Pipe** first, Pipe** second) { + *first = NULL; + *second = NULL; + + for (int i = 0; i < FLAPPY_NUM_PIPES; i++) { + Pipe* pipe = &env->pipes[i]; + float dx = pipe->x + env->pipe_width - env->bird_x; + if (dx < 0.0f) { + continue; + } + + if (*first == NULL || pipe->x < (*first)->x) { + *second = *first; + *first = pipe; + } else if (*second == NULL || pipe->x < (*second)->x) { + *second = pipe; } } - return best; } static inline void flappy_compute_observations(Flappy* env) { - Pipe* pipe = flappy_next_pipe(env); + Pipe* pipe; + Pipe* next_pipe; + flappy_next_pipes(env, &pipe, &next_pipe); + if (pipe == NULL) { + pipe = &env->pipes[0]; + } + if (next_pipe == NULL) { + next_pipe = pipe; + } + float dx = (pipe->x + env->pipe_width - env->bird_x) / env->width; float dy = (env->bird_y - pipe->gap_y) / env->height; + float next_dx = (next_pipe->x + env->pipe_width - env->bird_x) / env->width; env->observations[0] = env->bird_y / env->height; env->observations[1] = flappy_clampf(env->bird_vy / 16.0f, -1.0f, 1.0f); env->observations[2] = flappy_clampf(dx, 0.0f, 1.5f); env->observations[3] = pipe->gap_y / env->height; env->observations[4] = flappy_clampf(dy, -1.0f, 1.0f); - env->observations[5] = flappy_clampf((float)env->score / 20.0f, 0.0f, 1.0f); + env->observations[5] = flappy_clampf(next_dx, 0.0f, 2.0f); + env->observations[6] = next_pipe->gap_y / env->height; } static inline void flappy_add_log(Flappy* env) { @@ -191,7 +209,12 @@ void c_step(Flappy* env) { } } - Pipe* next = flappy_next_pipe(env); + Pipe* next; + Pipe* ignored; + flappy_next_pipes(env, &next, &ignored); + if (next == NULL) { + next = &env->pipes[0]; + } float center_error = fabsf(env->bird_y - next->gap_y) / (env->height * 0.5f); env->rewards[0] += env->center_reward * (1.0f - flappy_clampf(center_error, 0.0f, 1.0f)); From 4ec1f607e6b9969845df877b858c3d913e5ee1b3 Mon Sep 17 00:00:00 2001 From: M K Date: Wed, 10 Jun 2026 19:08:46 +0200 Subject: [PATCH 3/4] Simplify Flappy observations --- ocean/flappy/flappy.h | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/ocean/flappy/flappy.h b/ocean/flappy/flappy.h index e5898f47ce..9bca4c1631 100644 --- a/ocean/flappy/flappy.h +++ b/ocean/flappy/flappy.h @@ -6,7 +6,7 @@ #include #include "raylib.h" -#define FLAPPY_OBS_SIZE 7 +#define FLAPPY_OBS_SIZE 6 #define FLAPPY_NUM_PIPES 3 #define FLAPPY_NOOP 0 @@ -120,16 +120,14 @@ static inline void flappy_compute_observations(Flappy* env) { } float dx = (pipe->x + env->pipe_width - env->bird_x) / env->width; - float dy = (env->bird_y - pipe->gap_y) / env->height; float next_dx = (next_pipe->x + env->pipe_width - env->bird_x) / env->width; env->observations[0] = env->bird_y / env->height; env->observations[1] = flappy_clampf(env->bird_vy / 16.0f, -1.0f, 1.0f); env->observations[2] = flappy_clampf(dx, 0.0f, 1.5f); env->observations[3] = pipe->gap_y / env->height; - env->observations[4] = flappy_clampf(dy, -1.0f, 1.0f); - env->observations[5] = flappy_clampf(next_dx, 0.0f, 2.0f); - env->observations[6] = next_pipe->gap_y / env->height; + env->observations[4] = flappy_clampf(next_dx, 0.0f, 2.0f); + env->observations[5] = next_pipe->gap_y / env->height; } static inline void flappy_add_log(Flappy* env) { From 55174506fdf8d9a84cf90e78c470b138daac3cd5 Mon Sep 17 00:00:00 2001 From: M K Date: Wed, 10 Jun 2026 19:14:54 +0200 Subject: [PATCH 4/4] Remove Flappy observation clamping --- ocean/flappy/flappy.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/ocean/flappy/flappy.h b/ocean/flappy/flappy.h index 9bca4c1631..050a910bc9 100644 --- a/ocean/flappy/flappy.h +++ b/ocean/flappy/flappy.h @@ -123,10 +123,10 @@ static inline void flappy_compute_observations(Flappy* env) { float next_dx = (next_pipe->x + env->pipe_width - env->bird_x) / env->width; env->observations[0] = env->bird_y / env->height; - env->observations[1] = flappy_clampf(env->bird_vy / 16.0f, -1.0f, 1.0f); - env->observations[2] = flappy_clampf(dx, 0.0f, 1.5f); + env->observations[1] = env->bird_vy / 16.0f; + env->observations[2] = dx; env->observations[3] = pipe->gap_y / env->height; - env->observations[4] = flappy_clampf(next_dx, 0.0f, 2.0f); + env->observations[4] = next_dx; env->observations[5] = next_pipe->gap_y / env->height; }