From 96b306af08864f79a13d6e3028fb2ab9aded850c Mon Sep 17 00:00:00 2001 From: Haolin Date: Sat, 13 Jun 2026 22:27:37 -0400 Subject: [PATCH 1/2] Add reproducible minimal PPO WSL workflow --- .gitignore | 10 + README.md | 6 + docs/src/minimal-ppo.md | 159 +++++++++++ pufferlib/ocean/drive/drive.h | 22 +- scripts/install_wsl_admin.ps1 | 25 ++ scripts/minimal_ppo_train.py | 398 +++++++++++++++++++++++++++ scripts/parallel_data_collect.py | 118 ++++++++ scripts/prepare_waymo_maps.py | 78 ++++++ scripts/prepare_waymo_maps_wsl.sh | 36 +++ scripts/run_minimal_ppo_wsl.sh | 26 ++ scripts/visualize_minimal_ppo.py | 244 ++++++++++++++++ scripts/visualize_minimal_ppo_wsl.sh | 32 +++ scripts/wsl_native_3d_setup.sh | 82 ++++++ 13 files changed, 1235 insertions(+), 1 deletion(-) create mode 100644 docs/src/minimal-ppo.md create mode 100644 scripts/install_wsl_admin.ps1 create mode 100644 scripts/minimal_ppo_train.py create mode 100644 scripts/parallel_data_collect.py create mode 100644 scripts/prepare_waymo_maps.py create mode 100644 scripts/prepare_waymo_maps_wsl.sh create mode 100644 scripts/run_minimal_ppo_wsl.sh create mode 100644 scripts/visualize_minimal_ppo.py create mode 100644 scripts/visualize_minimal_ppo_wsl.sh create mode 100644 scripts/wsl_native_3d_setup.sh diff --git a/.gitignore b/.gitignore index 2bd9405687..baa8a00f28 100644 --- a/.gitignore +++ b/.gitignore @@ -190,3 +190,13 @@ pufferlib/resources/drive/output*.gif emsdk/ docs/book/* !docs/book/assets/ + +# Local PufferDrive RL experiments +/native_3d_renders/ +/training_visualizations/ +/visualizations/ +/outputs/ +/pufferdrive_headless_demo.mp4 +/tfrecord-*.json +/waymo_viewer_server.*.log +/pufferlib/resources/drive/waymo_*_json/ diff --git a/README.md b/README.md index 0b431e7169..00aae40fd7 100644 --- a/README.md +++ b/README.md @@ -66,6 +66,12 @@ Start a training run puffer train puffer_drive ``` +### Minimal PPO on Windows/WSL + +For an inspectable continuous-action PPO implementation, Waymo JSON map +preparation, and native third-person checkpoint rendering, see +[Minimal PPO training](docs/src/minimal-ppo.md). + ## Dataset
diff --git a/docs/src/minimal-ppo.md b/docs/src/minimal-ppo.md new file mode 100644 index 0000000000..700d656387 --- /dev/null +++ b/docs/src/minimal-ppo.md @@ -0,0 +1,159 @@ +# Minimal PPO training + +`scripts/minimal_ppo_train.py` is a small continuous-action PPO implementation +built directly on the rollout pattern in `scripts/parallel_data_collect.py`. +It is intended to make the training architecture explicit before moving to the +full PuffeRL trainer. + +## Architecture + +Each controlled agent produces one row of the vectorized batch: + +```text +Drive observations + | + v +2-layer MLP encoder + | + +----> Gaussian actor ----> tanh ----> [acceleration, steering] + | + +----> critic ------------> V(observation) +``` + +The trainer repeatedly: + +1. Collects a vectorized rollout from `Drive`. +2. Stores observations, raw policy actions, log probabilities, rewards, + terminal flags, and value predictions. +3. Computes generalized advantage estimates (GAE). +4. Updates actor and critic with the PPO clipped objective. +5. Saves checkpoints and runs a short deterministic evaluation. + +## What is included + +- `scripts/wsl_native_3d_setup.sh`: installs the WSL/Linux dependencies and + builds the native Raylib binding on the Linux filesystem. +- `scripts/prepare_waymo_maps_wsl.sh`: converts one or more exported WOMD JSON + scenarios into contiguous `map_000.bin`, `map_001.bin`, ... files. +- `scripts/minimal_ppo_train.py`: self-contained continuous-action PPO. +- `scripts/run_minimal_ppo_wsl.sh`: launches training in the Linux-native copy. +- `scripts/visualize_minimal_ppo.py`: deterministic evaluation with native 3D + rendering. +- `scripts/visualize_minimal_ppo_wsl.sh`: launches evaluation and copies the + video and JSON metrics back to Windows. + +Waymo scenarios, checkpoints, and rendered videos are deliberately not stored +in Git. Users must comply with the Waymo Open Dataset license and provide their +own exported scenario JSON files. + +## Clone and set up + +From PowerShell, clone the repository and enter WSL: + +```powershell +git clone https://github.com//PufferDrive.git +cd PufferDrive +wsl +``` + +If WSL/Ubuntu is not installed, open an elevated PowerShell and run: + +```powershell +powershell -ExecutionPolicy Bypass -File scripts/install_wsl_admin.ps1 +``` + +From the WSL shell: + +```bash +bash scripts/wsl_native_3d_setup.sh +``` + +The setup script mirrors a Windows-mounted checkout to +`~/PufferDrive-native`, creates `~/.venvs/pufferdrive-wsl`, installs the +dependencies, and builds the native extension there. + +## Prepare Waymo scenarios + +Pass one or more exported Waymo Motion Dataset scenario JSON files: + +```bash +bash scripts/prepare_waymo_maps_wsl.sh \ + ./scenario_a.json \ + ./scenario_b.json +``` + +The generated maps live in the Linux-native repository under +`resources/drive/binaries/training`. The order of the JSON arguments determines +the contiguous map indices. + +## WSL smoke test + +Run this from the Windows-mounted repository: + +```bash +bash scripts/run_minimal_ppo_wsl.sh \ + --map-dir resources/drive/binaries/training \ + --num-maps 2 \ + --num-envs 1 \ + --total-timesteps 10000 \ + --rollout-steps 128 \ + --minibatch-size 128 \ + --checkpoint-interval 10 +``` + +This verifies the training loop but is not enough data or experience to learn +a useful driving policy. + +## Visualize a checkpoint + +```bash +bash scripts/visualize_minimal_ppo_wsl.sh \ + --map-dir resources/drive/binaries/training \ + --num-maps 2 \ + --episode-length 91 \ + --draw-traces +``` + +The command writes an MP4 and a JSON metrics file to +`training_visualizations/` in the Windows checkout. + +## Small experiment + +After preparing a directory with multiple contiguous maps named +`map_000.bin`, `map_001.bin`, and so on: + +```bash +bash scripts/run_minimal_ppo_wsl.sh \ + --map-dir resources/drive/binaries/training \ + --num-maps 100 \ + --num-envs 16 \ + --total-timesteps 1000000 \ + --rollout-steps 128 \ + --minibatch-size 512 +``` + +The default `goal_behavior=1` samples new lane-based goals after reaching the +current goal. Use `--goal-behavior 2` to train only against each JSON scenario's +fixed `goalPosition`. + +## Reading the logs + +- `reward/step`: mean immediate reward in the latest rollout. +- `episode_return`: mean completed-episode return over the latest 100 episodes. +- `policy_loss`: PPO actor loss. +- `value_loss`: critic regression loss. +- `entropy`: policy exploration; it normally decreases gradually. +- `kl`: approximate policy change per update. +- `clipfrac`: fraction of samples clipped by PPO. + +The smoke test succeeds when steps advance, losses remain finite, and +checkpoints are written. A driving model should instead be judged on held-out +maps using goal completion, collision rate, and off-road rate. + +## Current limitation + +The minimal trainer validates the architecture, but its current reward is not +yet sufficient for high-quality driving. The first smoke-test policy may learn +undesired motion such as reversing. The next iteration should add explicit +route-progress reward, reverse-motion penalty, and stronger collision/off-road +costs before scaling training. diff --git a/pufferlib/ocean/drive/drive.h b/pufferlib/ocean/drive/drive.h index 2a55867120..327972c351 100644 --- a/pufferlib/ocean/drive/drive.h +++ b/pufferlib/ocean/drive/drive.h @@ -1,4 +1,5 @@ #include +#include #include #include #include @@ -2998,6 +2999,22 @@ void draw_scene(Drive *env, Client *client, int mode, int obs_only, int lasers, } } +static int write_full_frame(int fd, const unsigned char *data, size_t size) { + size_t total_written = 0; + while (total_written < size) { + ssize_t written = write(fd, data + total_written, size - total_written); + if (written > 0) { + total_written += (size_t)written; + continue; + } + if (written < 0 && errno == EINTR) { + continue; + } + return -1; + } + return 0; +} + void c_render(Drive *env, int view_mode, int draw_traces) { // Create client on first render call @@ -3095,7 +3112,10 @@ void c_render(Drive *env, int view_mode, int draw_traces) { unsigned char *screen_data = rlReadScreenPixels((int)client->width, (int)client->height); if (screen_data) { - write(client->recorder_pipefd[1], screen_data, (int)client->width * (int)client->height * 4); + size_t frame_size = (size_t)client->width * (size_t)client->height * 4; + if (write_full_frame(client->recorder_pipefd[1], screen_data, frame_size) != 0) { + fprintf(stderr, "Failed to write complete render frame to ffmpeg: %s\n", strerror(errno)); + } RL_FREE(screen_data); } } else { // Pop-up window diff --git a/scripts/install_wsl_admin.ps1 b/scripts/install_wsl_admin.ps1 new file mode 100644 index 0000000000..b5857ec17f --- /dev/null +++ b/scripts/install_wsl_admin.ps1 @@ -0,0 +1,25 @@ +# Run this file from an elevated Windows PowerShell window. +# It enables WSL2 prerequisites and installs Ubuntu. + +$ErrorActionPreference = "Stop" + +Write-Host "Enabling Windows Subsystem for Linux..." +dism.exe /online /enable-feature /featurename:Microsoft-Windows-Subsystem-Linux /all /norestart + +Write-Host "Enabling Virtual Machine Platform..." +dism.exe /online /enable-feature /featurename:VirtualMachinePlatform /all /norestart + +Write-Host "Setting WSL 2 as the default version..." +wsl.exe --set-default-version 2 + +Write-Host "Installing Ubuntu..." +wsl.exe --install -d Ubuntu + +$repoRoot = (Resolve-Path (Join-Path $PSScriptRoot "..")).Path +$driveLetter = $repoRoot.Substring(0, 1).ToLowerInvariant() +$wslRepoRoot = "/mnt/$driveLetter/" + $repoRoot.Substring(3).Replace("\", "/") + +Write-Host "" +Write-Host "If Windows asks for a restart, reboot, open Ubuntu once to create the Linux user, then run:" +Write-Host " cd '$wslRepoRoot'" +Write-Host " bash scripts/wsl_native_3d_setup.sh" diff --git a/scripts/minimal_ppo_train.py b/scripts/minimal_ppo_train.py new file mode 100644 index 0000000000..dba0bf13de --- /dev/null +++ b/scripts/minimal_ppo_train.py @@ -0,0 +1,398 @@ +"""Minimal continuous-action PPO trainer for PufferDrive. + +This intentionally keeps the full RL loop in one file so the relationship +between collection, GAE, PPO updates, and checkpoints is easy to inspect. +""" + +import argparse +import json +import os +import random +import sys +import time +from collections import deque +from dataclasses import asdict, dataclass +from pathlib import Path + +import numpy as np +import torch +from torch import nn +from torch.distributions import Normal + + +REPO_ROOT = Path(__file__).resolve().parents[1] +os.chdir(REPO_ROOT) +sys.path.insert(0, str(REPO_ROOT)) + + +@dataclass +class TrainConfig: + map_dir: str + num_maps: int + num_envs: int + controlled_agents_per_env: int + episode_length: int + resample_frequency: int + goal_behavior: int + goal_target_distance: float + total_timesteps: int + rollout_steps: int + update_epochs: int + minibatch_size: int + learning_rate: float + gamma: float + gae_lambda: float + clip_coef: float + value_coef: float + entropy_coef: float + max_grad_norm: float + hidden_size: int + seed: int + device: str + checkpoint_dir: str + checkpoint_interval: int + eval_steps: int + resume: str | None + + +class ActorCritic(nn.Module): + def __init__(self, observation_dim, action_dim, hidden_size): + super().__init__() + self.encoder = nn.Sequential( + nn.Linear(observation_dim, hidden_size), + nn.LayerNorm(hidden_size), + nn.Tanh(), + nn.Linear(hidden_size, hidden_size), + nn.Tanh(), + ) + self.actor_mean = nn.Linear(hidden_size, action_dim) + self.log_std = nn.Parameter(torch.full((action_dim,), -0.5)) + self.critic = nn.Linear(hidden_size, 1) + + nn.init.orthogonal_(self.actor_mean.weight, gain=0.01) + nn.init.constant_(self.actor_mean.bias, 0) + nn.init.orthogonal_(self.critic.weight, gain=1.0) + nn.init.constant_(self.critic.bias, 0) + + def distribution_and_value(self, observations): + hidden = self.encoder(observations) + mean = self.actor_mean(hidden) + std = self.log_std.exp().expand_as(mean) + return Normal(mean, std), self.critic(hidden).squeeze(-1) + + @torch.no_grad() + def sample_action(self, observations): + distribution, value = self.distribution_and_value(observations) + raw_action = distribution.sample() + env_action = torch.tanh(raw_action) + log_prob = distribution.log_prob(raw_action).sum(dim=-1) + return env_action, raw_action, log_prob, value + + @torch.no_grad() + def deterministic_action(self, observations): + distribution, value = self.distribution_and_value(observations) + return torch.tanh(distribution.mean), value + + def evaluate_raw_action(self, observations, raw_actions): + distribution, value = self.distribution_and_value(observations) + log_prob = distribution.log_prob(raw_actions).sum(dim=-1) + entropy = distribution.entropy().sum(dim=-1) + return log_prob, entropy, value + + +class RolloutBuffer: + def __init__(self, steps, agents, observation_dim, action_dim, device): + self.observations = torch.zeros((steps, agents, observation_dim), dtype=torch.float32, device=device) + self.raw_actions = torch.zeros((steps, agents, action_dim), dtype=torch.float32, device=device) + self.log_probs = torch.zeros((steps, agents), dtype=torch.float32, device=device) + self.rewards = torch.zeros((steps, agents), dtype=torch.float32, device=device) + self.dones = torch.zeros((steps, agents), dtype=torch.float32, device=device) + self.values = torch.zeros((steps, agents), dtype=torch.float32, device=device) + self.advantages = torch.zeros((steps, agents), dtype=torch.float32, device=device) + self.returns = torch.zeros((steps, agents), dtype=torch.float32, device=device) + + def compute_gae(self, last_value, gamma, gae_lambda): + last_advantage = torch.zeros_like(last_value) + for step in reversed(range(self.rewards.shape[0])): + if step == self.rewards.shape[0] - 1: + next_value = last_value + else: + next_value = self.values[step + 1] + next_nonterminal = 1.0 - self.dones[step] + delta = self.rewards[step] + gamma * next_value * next_nonterminal - self.values[step] + last_advantage = delta + gamma * gae_lambda * next_nonterminal * last_advantage + self.advantages[step] = last_advantage + self.returns.copy_(self.advantages + self.values) + + def flatten(self): + return { + "observations": self.observations.flatten(0, 1), + "raw_actions": self.raw_actions.flatten(0, 1), + "log_probs": self.log_probs.flatten(), + "values": self.values.flatten(), + "advantages": self.advantages.flatten(), + "returns": self.returns.flatten(), + } + + +def resolve_device(requested): + if requested == "auto": + return torch.device("cuda" if torch.cuda.is_available() else "cpu") + return torch.device(requested) + + +def make_env(config): + from pufferlib.ocean.drive.drive import Drive + + map_dir = Path(config.map_dir) + if not map_dir.exists(): + raise FileNotFoundError(f"Map directory does not exist: {map_dir}") + + num_agents = config.num_envs * config.controlled_agents_per_env + return Drive( + map_dir=str(map_dir), + num_maps=config.num_maps, + num_agents=num_agents, + control_mode="control_mixed_play", + init_mode="create_all_valid", + goal_behavior=config.goal_behavior, + goal_target_distance=config.goal_target_distance, + action_type="continuous", + episode_length=config.episode_length, + termination_mode=0, + resample_frequency=config.resample_frequency, + render_mode=1, + max_controlled_agents=config.controlled_agents_per_env, + ) + + +def save_checkpoint(path, model, optimizer, config, global_step, update): + path.parent.mkdir(parents=True, exist_ok=True) + torch.save( + { + "model": model.state_dict(), + "optimizer": optimizer.state_dict(), + "config": asdict(config), + "global_step": global_step, + "update": update, + }, + path, + ) + + +def ppo_update(model, optimizer, buffer, config): + batch = buffer.flatten() + advantages = batch["advantages"] + advantages = (advantages - advantages.mean()) / (advantages.std() + 1e-8) + batch_size = advantages.shape[0] + indices = torch.arange(batch_size, device=advantages.device) + + metrics = { + "policy_loss": [], + "value_loss": [], + "entropy": [], + "approx_kl": [], + "clip_fraction": [], + } + + for _ in range(config.update_epochs): + permutation = indices[torch.randperm(batch_size, device=indices.device)] + for start in range(0, batch_size, config.minibatch_size): + mb = permutation[start : start + config.minibatch_size] + new_log_prob, entropy, new_value = model.evaluate_raw_action( + batch["observations"][mb], + batch["raw_actions"][mb], + ) + + log_ratio = new_log_prob - batch["log_probs"][mb] + ratio = log_ratio.exp() + mb_advantages = advantages[mb] + policy_loss = -torch.min( + ratio * mb_advantages, + torch.clamp(ratio, 1 - config.clip_coef, 1 + config.clip_coef) * mb_advantages, + ).mean() + + value_loss = 0.5 * (new_value - batch["returns"][mb]).pow(2).mean() + entropy_loss = entropy.mean() + loss = policy_loss + config.value_coef * value_loss - config.entropy_coef * entropy_loss + + optimizer.zero_grad(set_to_none=True) + loss.backward() + nn.utils.clip_grad_norm_(model.parameters(), config.max_grad_norm) + optimizer.step() + + with torch.no_grad(): + metrics["policy_loss"].append(policy_loss.item()) + metrics["value_loss"].append(value_loss.item()) + metrics["entropy"].append(entropy_loss.item()) + metrics["approx_kl"].append(((ratio - 1) - log_ratio).mean().item()) + metrics["clip_fraction"].append(((ratio - 1).abs() > config.clip_coef).float().mean().item()) + + return {name: float(np.mean(values)) for name, values in metrics.items()} + + +@torch.no_grad() +def evaluate(model, env, observation, device, steps): + rewards = [] + for _ in range(steps): + obs_tensor = torch.as_tensor(observation, dtype=torch.float32, device=device) + actions, _ = model.deterministic_action(obs_tensor) + observation, reward, terminal, truncation, info = env.step(actions.cpu().numpy().astype(np.float32)) + rewards.append(float(np.mean(reward))) + return observation, float(np.mean(rewards)), float(np.sum(rewards)) + + +def train(config): + random.seed(config.seed) + np.random.seed(config.seed) + torch.manual_seed(config.seed) + device = resolve_device(config.device) + + print("Creating PufferDrive environment...") + env = make_env(config) + observation, _ = env.reset(seed=config.seed) + num_agents, observation_dim = observation.shape + action_dim = env.single_action_space.shape[0] + + print(f"device={device}") + print(f"agents={num_agents}, envs={env.num_envs}, obs_dim={observation_dim}, action_dim={action_dim}") + print(f"map_ids={env.map_ids}") + print(f"scenario_ids={env.scenario_ids}") + + model = ActorCritic(observation_dim, action_dim, config.hidden_size).to(device) + optimizer = torch.optim.Adam(model.parameters(), lr=config.learning_rate, eps=1e-5) + global_step = 0 + first_update = 1 + + if config.resume: + checkpoint = torch.load(config.resume, map_location=device, weights_only=False) + model.load_state_dict(checkpoint["model"]) + optimizer.load_state_dict(checkpoint["optimizer"]) + global_step = int(checkpoint.get("global_step", 0)) + first_update = int(checkpoint.get("update", 0)) + 1 + print(f"Resumed from {config.resume} at step {global_step}") + + steps_per_update = config.rollout_steps * num_agents + total_updates = max(1, (config.total_timesteps - global_step + steps_per_update - 1) // steps_per_update) + recent_episode_returns = deque(maxlen=100) + recent_episode_lengths = deque(maxlen=100) + running_returns = np.zeros(num_agents, dtype=np.float32) + running_lengths = np.zeros(num_agents, dtype=np.int32) + started_at = time.time() + + try: + for update_offset in range(total_updates): + update = first_update + update_offset + buffer = RolloutBuffer( + config.rollout_steps, + num_agents, + observation_dim, + action_dim, + device, + ) + + for step in range(config.rollout_steps): + obs_tensor = torch.as_tensor(observation, dtype=torch.float32, device=device) + env_action, raw_action, log_prob, value = model.sample_action(obs_tensor) + next_observation, reward, terminal, truncation, info = env.step( + env_action.cpu().numpy().astype(np.float32) + ) + done = np.logical_or(terminal, truncation) + + buffer.observations[step].copy_(obs_tensor) + buffer.raw_actions[step].copy_(raw_action) + buffer.log_probs[step].copy_(log_prob) + buffer.values[step].copy_(value) + buffer.rewards[step].copy_(torch.as_tensor(reward, dtype=torch.float32, device=device)) + buffer.dones[step].copy_(torch.as_tensor(done, dtype=torch.float32, device=device)) + + running_returns += reward + running_lengths += 1 + for agent_index in np.flatnonzero(done): + recent_episode_returns.append(float(running_returns[agent_index])) + recent_episode_lengths.append(int(running_lengths[agent_index])) + running_returns[agent_index] = 0 + running_lengths[agent_index] = 0 + + observation = next_observation + global_step += num_agents + + with torch.no_grad(): + last_obs = torch.as_tensor(observation, dtype=torch.float32, device=device) + _, last_value = model.distribution_and_value(last_obs) + buffer.compute_gae(last_value, config.gamma, config.gae_lambda) + metrics = ppo_update(model, optimizer, buffer, config) + + elapsed = max(time.time() - started_at, 1e-6) + sps = int(global_step / elapsed) + mean_reward = float(buffer.rewards.mean().item()) + episode_return = float(np.mean(recent_episode_returns)) if recent_episode_returns else float("nan") + episode_length = float(np.mean(recent_episode_lengths)) if recent_episode_lengths else float("nan") + print( + f"update={update:04d} step={global_step:09d} sps={sps:6d} " + f"reward/step={mean_reward:+.4f} episode_return={episode_return:+.3f} " + f"episode_len={episode_length:.1f} policy_loss={metrics['policy_loss']:+.4f} " + f"value_loss={metrics['value_loss']:.4f} entropy={metrics['entropy']:.3f} " + f"kl={metrics['approx_kl']:.5f} clipfrac={metrics['clip_fraction']:.3f}" + ) + + if update % config.checkpoint_interval == 0 or global_step >= config.total_timesteps: + checkpoint_path = Path(config.checkpoint_dir) / f"ppo_step_{global_step}.pt" + save_checkpoint(checkpoint_path, model, optimizer, config, global_step, update) + print(f"saved checkpoint: {checkpoint_path}") + + if global_step >= config.total_timesteps: + break + + observation, eval_mean_reward, eval_return = evaluate( + model, + env, + observation, + device, + config.eval_steps, + ) + print(f"deterministic_eval mean_reward={eval_mean_reward:+.4f} return={eval_return:+.3f}") + + final_path = Path(config.checkpoint_dir) / "ppo_final.pt" + save_checkpoint(final_path, model, optimizer, config, global_step, update) + print(f"saved final checkpoint: {final_path}") + finally: + env.close() + + +def parse_args(): + parser = argparse.ArgumentParser(description="Minimal PPO trainer based on parallel_data_collect.py") + parser.add_argument("--map-dir", default="resources/drive/binaries") + parser.add_argument("--num-maps", type=int, default=1) + parser.add_argument("--num-envs", type=int, default=4) + parser.add_argument("--controlled-agents-per-env", type=int, default=1) + parser.add_argument("--episode-length", type=int, default=91) + parser.add_argument("--resample-frequency", type=int, default=910) + parser.add_argument("--goal-behavior", type=int, choices=[0, 1, 2], default=1) + parser.add_argument("--goal-target-distance", type=float, default=30.0) + parser.add_argument("--total-timesteps", type=int, default=100_000) + parser.add_argument("--rollout-steps", type=int, default=128) + parser.add_argument("--update-epochs", type=int, default=4) + parser.add_argument("--minibatch-size", type=int, default=512) + parser.add_argument("--learning-rate", type=float, default=3e-4) + parser.add_argument("--gamma", type=float, default=0.98) + parser.add_argument("--gae-lambda", type=float, default=0.95) + parser.add_argument("--clip-coef", type=float, default=0.2) + parser.add_argument("--value-coef", type=float, default=0.5) + parser.add_argument("--entropy-coef", type=float, default=0.005) + parser.add_argument("--max-grad-norm", type=float, default=1.0) + parser.add_argument("--hidden-size", type=int, default=256) + parser.add_argument("--seed", type=int, default=42) + parser.add_argument("--device", choices=["auto", "cpu", "cuda"], default="auto") + parser.add_argument("--checkpoint-dir", default="checkpoints/minimal_ppo") + parser.add_argument("--checkpoint-interval", type=int, default=10) + parser.add_argument("--eval-steps", type=int, default=91) + parser.add_argument("--resume") + args = parser.parse_args() + return TrainConfig(**vars(args)) + + +if __name__ == "__main__": + config = parse_args() + print(json.dumps(asdict(config), indent=2)) + train(config) diff --git a/scripts/parallel_data_collect.py b/scripts/parallel_data_collect.py new file mode 100644 index 0000000000..f0097954a7 --- /dev/null +++ b/scripts/parallel_data_collect.py @@ -0,0 +1,118 @@ +import os +import sys +from pathlib import Path +import numpy as np + +# Run from the PufferDrive project root so relative resource/config paths work. +working_dir = Path.cwd() +while not (working_dir / "pufferlib").exists(): + if working_dir == working_dir.parent: + raise FileNotFoundError("Could not find the PufferDrive project root containing 'pufferlib'") + working_dir = working_dir.parent +os.chdir(working_dir) +sys.path.append(str(working_dir / "pufferlib" / "ocean" / "drive")) + +from pufferlib.ocean.drive.drive import Drive, RenderView + +def main(): + MAP_DIR = "resources/drive/binaries/training" + + if not Path(MAP_DIR).exists(): + print(f"Warning: {MAP_DIR} not found.") + return + + # PufferDrive samples vectorized envs from the first NUM_MAPS map files. + # num_agents is the target total number of controlled rows in the batch. + NUM_ENVS = 10 + NUM_MAPS = 100 + CONTROLLED_AGENTS_PER_ENV = 1 + NUM_AGENTS = NUM_ENVS * CONTROLLED_AGENTS_PER_ENV + EPISODE_LEN = 91 + + print("Initializing PufferDrive Environment...") + env = Drive( + map_dir=MAP_DIR, + num_maps=NUM_MAPS, + num_agents=NUM_AGENTS, + # max_controlled_agents is enforced for control_mixed_play in the C env. + control_mode="control_mixed_play", + init_mode="create_all_valid", + goal_behavior=2, + action_type="continuous", + episode_length=EPISODE_LEN, + render_mode=RenderView.FULL_SIM_STATE, + max_controlled_agents=CONTROLLED_AGENTS_PER_ENV, + ) + + obs, _ = env.reset() + print("env obs space:", env.observation_space) + print("env action space:", env.action_space) + print("single env obs shape:", env.single_observation_space) + print("single env action shape:", env.single_action_space) + + num_controlled_agents = obs.shape[0] + if env.num_envs != NUM_ENVS or num_controlled_agents != NUM_AGENTS: + env.close() + raise RuntimeError( + f"Expected {NUM_ENVS} envs/{NUM_AGENTS} controlled agents, " + f"got {env.num_envs} envs/{num_controlled_agents} controlled agents." + ) + + print(f"Environment initialized. Batch size (controlled agents): {num_controlled_agents}") + print(f"Vectorized env count: {env.num_envs}") + print(f"Sampled map ids: {env.map_ids}") + print(f"Scenario ids: {env.scenario_ids}") + + # 2. Initialize lists to store our data + collected_obs = [] + collected_actions = [] + collected_rewards = [] + collected_terminals = [] + collected_truncations = [] + + print("Starting Parallel Native Render Rollout...") + for t in range(EPISODE_LEN): + # 3. Store the observation. + # CRITICAL: Use .copy() because PufferLib writes to the same C-memory buffer in-place. + collected_obs.append(obs.copy()) + + # Generate actions for all parallel agents simultaneously + rl_actions = np.random.uniform(-1.0, 1.0, size=(num_controlled_agents, 2)).astype(np.float32) + + # Store the actions + collected_actions.append(rl_actions.copy()) + + # Step the environment + obs, rewards, terminals, truncations, infos = env.step(rl_actions) + collected_rewards.append(rewards.copy()) + collected_terminals.append(terminals.copy()) + collected_truncations.append(truncations.copy()) + + # Render only the first environment (env_id=0) out of the 10 + # env.render(view_mode=RenderView.FULL_SIM_STATE, draw_traces=True, env_id=0) + + if t % 10 == 0: + print(f"Processed step {t:02d}/{EPISODE_LEN}...") + + if terminals.all() or truncations.all(): + print(f"All environments terminated at step {t}.") + break + + env.close() + + # 4. Stack the lists into final numpy arrays for easy saving/manipulation + final_obs_array = np.stack(collected_obs) # Shape: (T, 10, obs_dim) + final_actions_array = np.stack(collected_actions) # Shape: (T, 10, 2) + final_rewards_array = np.stack(collected_rewards) # Shape: (T, 10) + final_terminals_array = np.stack(collected_terminals) + final_truncations_array = np.stack(collected_truncations) + + print("\n=== Data Collection Complete ===") + print(f"Collected Obs Shape: {final_obs_array.shape} -> (Time, Agents, Features)") + print(f"Collected Actions Shape: {final_actions_array.shape} -> (Time, Agents, Actions)") + print(f"Collected Rewards Shape: {final_rewards_array.shape} -> (Time, Agents)") + print(f"Collected Terminals Shape: {final_terminals_array.shape} -> (Time, Agents)") + print(f"Collected Truncations Shape: {final_truncations_array.shape} -> (Time, Agents)") + +if __name__ == "__main__": + main() diff --git a/scripts/prepare_waymo_maps.py b/scripts/prepare_waymo_maps.py new file mode 100644 index 0000000000..5e6cdd6e51 --- /dev/null +++ b/scripts/prepare_waymo_maps.py @@ -0,0 +1,78 @@ +"""Convert exported Waymo Motion JSON scenarios into PufferDrive binaries.""" + +import argparse +import json +import os +import sys +from pathlib import Path + + +REPO_ROOT = Path(__file__).resolve().parents[1] +os.chdir(REPO_ROOT) +sys.path.insert(0, str(REPO_ROOT)) + +from pufferlib.ocean.drive.drive import load_map + + +def prepare_scenario(source_path, prepared_path): + with source_path.open("r", encoding="utf-8") as file: + data = json.load(file) + + sdc_track_index = data.get("metadata", {}).get("sdc_track_index") + for index, obj in enumerate(data.get("objects", [])): + obj["mark_as_expert"] = index != sdc_track_index + + prepared_path.parent.mkdir(parents=True, exist_ok=True) + prepared_path.write_text(json.dumps(data), encoding="utf-8") + return str(data.get("scenario_id") or source_path.stem) + + +def main(): + parser = argparse.ArgumentParser() + parser.add_argument("json_files", nargs="+", type=Path) + parser.add_argument( + "--output-dir", + type=Path, + default=Path("resources/drive/binaries/training"), + ) + parser.add_argument( + "--prepared-json-dir", + type=Path, + default=Path("resources/drive/waymo_training_json"), + ) + args = parser.parse_args() + + args.output_dir.mkdir(parents=True, exist_ok=True) + args.prepared_json_dir.mkdir(parents=True, exist_ok=True) + + manifest = [] + for map_index, source_path in enumerate(args.json_files): + if not source_path.is_file(): + raise FileNotFoundError(source_path) + + prepared_path = args.prepared_json_dir / f"map_{map_index:03d}.json" + binary_path = args.output_dir / f"map_{map_index:03d}.bin" + scenario_id = prepare_scenario(source_path, prepared_path) + load_map( + str(prepared_path), + unique_map_id=map_index, + binary_output=str(binary_path), + ) + manifest.append( + { + "map_index": map_index, + "scenario_id": scenario_id, + "source": str(source_path), + "binary": str(binary_path), + } + ) + print(f"[{map_index + 1}/{len(args.json_files)}] {scenario_id} -> {binary_path}") + + manifest_path = args.output_dir / "manifest.json" + manifest_path.write_text(json.dumps(manifest, indent=2), encoding="utf-8") + print(f"Prepared {len(manifest)} maps") + print(f"Manifest: {manifest_path}") + + +if __name__ == "__main__": + main() diff --git a/scripts/prepare_waymo_maps_wsl.sh b/scripts/prepare_waymo_maps_wsl.sh new file mode 100644 index 0000000000..b6d5403ab9 --- /dev/null +++ b/scripts/prepare_waymo_maps_wsl.sh @@ -0,0 +1,36 @@ +#!/usr/bin/env bash +set -euo pipefail + +WINDOWS_REPO="$(cd "$(dirname "$0")/.." && pwd)" +NATIVE_REPO="${HOME}/PufferDrive-native" +VENV_DIR="${HOME}/.venvs/pufferdrive-wsl" + +if [[ $# -eq 0 ]]; then + echo "Usage: bash scripts/prepare_waymo_maps_wsl.sh SCENARIO.json [SCENARIO.json ...]" + exit 2 +fi + +if [[ ! -x "$VENV_DIR/bin/python" || ! -d "$NATIVE_REPO/pufferlib" ]]; then + echo "Native WSL environment is not ready." + echo "Run bash scripts/wsl_native_3d_setup.sh first." + exit 1 +fi + +json_files=() +for path in "$@"; do + if [[ ! -f "$path" ]]; then + echo "Scenario JSON not found: $path" + exit 1 + fi + json_files+=("$(realpath "$path")") +done + +cp -f "$WINDOWS_REPO/scripts/prepare_waymo_maps.py" "$NATIVE_REPO/scripts/prepare_waymo_maps.py" + +cd "$NATIVE_REPO" +source "$VENV_DIR/bin/activate" +export PYTHONPATH="$PWD${PYTHONPATH:+:$PYTHONPATH}" + +python scripts/prepare_waymo_maps.py "${json_files[@]}" + +echo "Prepared maps in $NATIVE_REPO/resources/drive/binaries/training" diff --git a/scripts/run_minimal_ppo_wsl.sh b/scripts/run_minimal_ppo_wsl.sh new file mode 100644 index 0000000000..646b253dca --- /dev/null +++ b/scripts/run_minimal_ppo_wsl.sh @@ -0,0 +1,26 @@ +#!/usr/bin/env bash +set -euo pipefail + +WINDOWS_REPO="$(cd "$(dirname "$0")/.." && pwd)" +NATIVE_REPO="${HOME}/PufferDrive-native" +VENV_DIR="${HOME}/.venvs/pufferdrive-wsl" + +if [[ ! -d "$NATIVE_REPO/pufferlib" ]]; then + echo "Native repository not found: $NATIVE_REPO" + echo "Run bash scripts/wsl_native_3d_setup.sh first." + exit 1 +fi + +if [[ ! -x "$VENV_DIR/bin/python" ]]; then + echo "WSL Python environment not found: $VENV_DIR" + echo "Run bash scripts/wsl_native_3d_setup.sh first." + exit 1 +fi + +cp -f "$WINDOWS_REPO/scripts/minimal_ppo_train.py" "$NATIVE_REPO/scripts/minimal_ppo_train.py" + +cd "$NATIVE_REPO" +source "$VENV_DIR/bin/activate" +export PYTHONPATH="$PWD${PYTHONPATH:+:$PYTHONPATH}" + +python scripts/minimal_ppo_train.py "$@" diff --git a/scripts/visualize_minimal_ppo.py b/scripts/visualize_minimal_ppo.py new file mode 100644 index 0000000000..e16e8d5662 --- /dev/null +++ b/scripts/visualize_minimal_ppo.py @@ -0,0 +1,244 @@ +"""Render a trained minimal PPO checkpoint with PufferDrive's native 3D view.""" + +import argparse +import json +import os +import shutil +import subprocess +import sys +import tempfile +import time +from pathlib import Path + +import numpy as np +import torch + + +REPO_ROOT = Path(__file__).resolve().parents[1] +os.chdir(REPO_ROOT) +sys.path.insert(0, str(REPO_ROOT)) + +from pufferlib.ocean.drive.drive import Drive, RenderView +from scripts.minimal_ppo_train import ActorCritic + + +def start_xvfb_if_needed(): + if os.environ.get("DISPLAY"): + return None + + for display_num in range(99, 120): + display = f":{display_num}" + lock_path = Path(f"/tmp/.X{display_num}-lock") + socket_path = Path(f"/tmp/.X11-unix/X{display_num}") + if lock_path.exists() or socket_path.exists(): + continue + + error_log = tempfile.NamedTemporaryFile(prefix="pufferdrive_eval_xvfb_", suffix=".log", delete=False) + error_log_path = Path(error_log.name) + proc = subprocess.Popen( + [ + "Xvfb", + display, + "-screen", + "0", + "1280x720x24", + "+extension", + "GLX", + "-ac", + "-noreset", + ], + stdout=subprocess.DEVNULL, + stderr=error_log, + ) + error_log.close() + os.environ["DISPLAY"] = display + + for _ in range(40): + if proc.poll() is not None: + break + if lock_path.exists() or socket_path.exists(): + time.sleep(0.5) + error_log_path.unlink(missing_ok=True) + return proc + time.sleep(0.1) + + proc.terminate() + proc.wait(timeout=2) + os.environ.pop("DISPLAY", None) + detail = error_log_path.read_text(errors="replace").strip() + error_log_path.unlink(missing_ok=True) + if detail: + print(detail) + + raise RuntimeError("Could not start Xvfb") + + +def stop_xvfb(proc): + if proc is None: + return + proc.terminate() + try: + proc.wait(timeout=2) + except subprocess.TimeoutExpired: + proc.kill() + proc.wait(timeout=2) + os.environ.pop("DISPLAY", None) + + +def probe_video(path): + command = [ + "ffprobe", + "-v", + "error", + "-count_frames", + "-select_streams", + "v:0", + "-show_entries", + "stream=nb_read_frames,duration,r_frame_rate", + "-of", + "json", + str(path), + ] + result = subprocess.run(command, check=True, capture_output=True, text=True) + stream = json.loads(result.stdout)["streams"][0] + return { + "video_frames": int(stream.get("nb_read_frames", 0)), + "video_duration_seconds": float(stream.get("duration", 0.0)), + "video_frame_rate": stream.get("r_frame_rate"), + } + + +def render_checkpoint(args): + device = torch.device(args.device) + checkpoint = torch.load(args.checkpoint, map_location=device, weights_only=False) + train_config = checkpoint.get("config", {}) + + goal_behavior = args.goal_behavior + if goal_behavior is None: + goal_behavior = int(train_config.get("goal_behavior", 1)) + + goal_target_distance = args.goal_target_distance + if goal_target_distance is None: + goal_target_distance = float(train_config.get("goal_target_distance", 30.0)) + + env = Drive( + map_dir=args.map_dir, + num_maps=args.num_maps, + num_agents=args.num_envs * args.controlled_agents_per_env, + control_mode="control_mixed_play", + init_mode="create_all_valid", + goal_behavior=goal_behavior, + goal_target_distance=goal_target_distance, + action_type="continuous", + episode_length=args.episode_length, + termination_mode=0, + resample_frequency=0, + render_mode=1, + human_agent_idx=args.agent_index, + max_controlled_agents=args.controlled_agents_per_env, + ) + + observation, _ = env.reset(seed=args.seed) + observation_dim = observation.shape[1] + action_dim = env.single_action_space.shape[0] + hidden_size = int(train_config.get("hidden_size", 256)) + model = ActorCritic(observation_dim, action_dim, hidden_size).to(device) + model.load_state_dict(checkpoint["model"]) + model.eval() + + scenario_id = env.scenario_ids[args.env_id] + native_output = Path(f"{scenario_id}.mp4") + native_output.unlink(missing_ok=True) + returns = np.zeros(observation.shape[0], dtype=np.float32) + reward_history = [] + action_history = [] + xvfb_proc = start_xvfb_if_needed() + + try: + env.render( + view_mode=RenderView.AGENT_PERSP, + draw_traces=args.draw_traces, + env_id=args.env_id, + ) + for step in range(args.episode_length): + obs_tensor = torch.as_tensor(observation, dtype=torch.float32, device=device) + with torch.no_grad(): + actions, values = model.deterministic_action(obs_tensor) + actions_np = actions.cpu().numpy().astype(np.float32) + observation, rewards, terminals, truncations, infos = env.step(actions_np) + env.render( + view_mode=RenderView.AGENT_PERSP, + draw_traces=args.draw_traces, + env_id=args.env_id, + ) + returns += rewards + reward_history.append(float(np.mean(rewards))) + action_history.append(actions_np[args.agent_index].tolist()) + if step % 10 == 0: + print( + f"step={step:02d}/{args.episode_length} " + f"mean_reward={np.mean(rewards):+.4f} " + f"ego_action={actions_np[args.agent_index]}" + ) + finally: + env.close() + stop_xvfb(xvfb_proc) + + if not native_output.exists(): + raise FileNotFoundError(f"Native renderer did not produce {native_output}") + + args.output_dir.mkdir(parents=True, exist_ok=True) + video_path = args.output_dir / f"{scenario_id}_ppo_step_{checkpoint.get('global_step', 0)}.mp4" + shutil.move(native_output, video_path) + video_probe = probe_video(video_path) + if video_probe["video_frames"] < args.episode_length: + raise RuntimeError( + f"Native renderer produced only {video_probe['video_frames']} frames; " + f"expected at least {args.episode_length}" + ) + + metrics = { + "checkpoint": str(args.checkpoint), + "checkpoint_global_step": int(checkpoint.get("global_step", 0)), + "scenario_id": scenario_id, + "episode_length": args.episode_length, + "goal_behavior": goal_behavior, + "goal_target_distance": goal_target_distance, + "mean_reward_per_step": float(np.mean(reward_history)), + "mean_agent_return": float(np.mean(returns)), + "ego_return": float(returns[args.agent_index]), + "mean_absolute_ego_action": np.mean(np.abs(action_history), axis=0).tolist(), + "video": str(video_path), + **video_probe, + } + metrics_path = video_path.with_suffix(".json") + metrics_path.write_text(json.dumps(metrics, indent=2), encoding="utf-8") + print(json.dumps(metrics, indent=2)) + return video_path, metrics_path + + +def parse_args(): + parser = argparse.ArgumentParser() + parser.add_argument( + "--checkpoint", + type=Path, + default=Path("checkpoints/minimal_ppo/ppo_final.pt"), + ) + parser.add_argument("--map-dir", default="resources/drive/binaries/waymo_native") + parser.add_argument("--num-maps", type=int, default=1) + parser.add_argument("--num-envs", type=int, default=1) + parser.add_argument("--controlled-agents-per-env", type=int, default=1) + parser.add_argument("--episode-length", type=int, default=91) + parser.add_argument("--goal-behavior", type=int, choices=[0, 1, 2]) + parser.add_argument("--goal-target-distance", type=float) + parser.add_argument("--env-id", type=int, default=0) + parser.add_argument("--agent-index", type=int, default=0) + parser.add_argument("--seed", type=int, default=42) + parser.add_argument("--device", choices=["cpu", "cuda"], default="cpu") + parser.add_argument("--draw-traces", action="store_true") + parser.add_argument("--output-dir", type=Path, default=Path("training_visualizations")) + return parser.parse_args() + + +if __name__ == "__main__": + render_checkpoint(parse_args()) diff --git a/scripts/visualize_minimal_ppo_wsl.sh b/scripts/visualize_minimal_ppo_wsl.sh new file mode 100644 index 0000000000..03d15936b8 --- /dev/null +++ b/scripts/visualize_minimal_ppo_wsl.sh @@ -0,0 +1,32 @@ +#!/usr/bin/env bash +set -euo pipefail + +WINDOWS_REPO="$(cd "$(dirname "$0")/.." && pwd)" +NATIVE_REPO="${HOME}/PufferDrive-native" +VENV_DIR="${HOME}/.venvs/pufferdrive-wsl" + +if [[ ! -f "$NATIVE_REPO/checkpoints/minimal_ppo/ppo_final.pt" ]]; then + echo "Checkpoint not found: $NATIVE_REPO/checkpoints/minimal_ppo/ppo_final.pt" + exit 1 +fi + +cp -f "$WINDOWS_REPO/scripts/minimal_ppo_train.py" "$NATIVE_REPO/scripts/minimal_ppo_train.py" +cp -f "$WINDOWS_REPO/scripts/visualize_minimal_ppo.py" "$NATIVE_REPO/scripts/visualize_minimal_ppo.py" + +cd "$NATIVE_REPO" +source "$VENV_DIR/bin/activate" +export PYTHONPATH="$PWD${PYTHONPATH:+:$PYTHONPATH}" + +if ! cmp -s "$WINDOWS_REPO/pufferlib/ocean/drive/drive.h" "$NATIVE_REPO/pufferlib/ocean/drive/drive.h"; then + echo "Native renderer source changed; rebuilding Ocean binding..." + cp -f "$WINDOWS_REPO/pufferlib/ocean/drive/drive.h" "$NATIVE_REPO/pufferlib/ocean/drive/drive.h" + NO_TRAIN=1 python setup.py build_ext --inplace --force +fi + +python scripts/visualize_minimal_ppo.py "$@" + +mkdir -p "$WINDOWS_REPO/training_visualizations" +cp -f training_visualizations/*.mp4 "$WINDOWS_REPO/training_visualizations/" +cp -f training_visualizations/*.json "$WINDOWS_REPO/training_visualizations/" + +echo "Copied outputs to $WINDOWS_REPO/training_visualizations" diff --git a/scripts/wsl_native_3d_setup.sh b/scripts/wsl_native_3d_setup.sh new file mode 100644 index 0000000000..bac5f1aa3a --- /dev/null +++ b/scripts/wsl_native_3d_setup.sh @@ -0,0 +1,82 @@ +#!/usr/bin/env bash +set -euo pipefail + +cd "$(dirname "$0")/.." +ORIGINAL_REPO_DIR="${PUFFERDRIVE_NATIVE_ORIGIN:-}" + +echo "==> Installing Linux build and render dependencies" +sudo apt update +sudo apt install -y \ + build-essential \ + clang \ + curl \ + ffmpeg \ + libgl1 \ + libglib2.0-0 \ + libx11-dev \ + libxcursor-dev \ + libxi-dev \ + libxinerama-dev \ + libxrandr-dev \ + libxxf86vm-dev \ + mesa-utils \ + python3-dev \ + python3-pip \ + python3-venv \ + rsync \ + xvfb + +if [[ "$PWD" == /mnt/* && -z "$ORIGINAL_REPO_DIR" ]]; then + NATIVE_REPO_DIR="${HOME}/PufferDrive-native" + echo "==> Repository is on a Windows mount; mirroring to ${NATIVE_REPO_DIR} for native Linux build" + mkdir -p "$NATIVE_REPO_DIR" + rsync -a --delete \ + --exclude ".git/" \ + --exclude ".venv/" \ + --exclude ".venv-wsl/" \ + --exclude ".venv-wsl-build/" \ + --exclude ".venv-wsl-native/" \ + --exclude "build/" \ + --exclude "dist/" \ + --exclude "*.egg-info/" \ + "$PWD/" "$NATIVE_REPO_DIR/" + echo "==> Restarting setup from Linux filesystem" + PUFFERDRIVE_NATIVE_ORIGIN="$PWD" bash "$NATIVE_REPO_DIR/scripts/wsl_native_3d_setup.sh" "$@" + exit $? +fi + +if [[ ! -d resources ]]; then + echo "==> Creating Linux resources symlink" + rm -f resources + ln -s pufferlib/resources resources +fi + +echo "==> Creating WSL Python environment" +VENV_DIR="${HOME}/.venvs/pufferdrive-wsl" +rm -rf "$VENV_DIR" +mkdir -p "$(dirname "$VENV_DIR")" +python3 -m venv --copies "$VENV_DIR" +source "$VENV_DIR/bin/activate" +python -m pip install --upgrade pip setuptools wheel + +echo "==> Installing Python dependencies" +python -m pip install \ + "numpy<2" \ + Cython \ + "gym==0.23" \ + "gymnasium==0.29.1" \ + "pettingzoo==1.24.1" \ + "shimmy[gym-v21]" \ + tqdm \ + imageio \ + imageio-ffmpeg \ + "torch" + +echo "==> Building PufferDrive native Ocean binding" +export PYTHONPATH="$PWD${PYTHONPATH:+:$PYTHONPATH}" +NO_TRAIN=1 python setup.py build_ext --inplace --force + +echo "==> Done. Native build is ready in ${PWD}" +echo "Next:" +echo " bash scripts/prepare_waymo_maps_wsl.sh /path/to/scenario.json" +echo " bash scripts/run_minimal_ppo_wsl.sh --map-dir resources/drive/binaries/training --num-maps 1" From 111adb43d33aadd755662047325a59d724d8317b Mon Sep 17 00:00:00 2001 From: Haolin Date: Mon, 15 Jun 2026 00:02:47 -0400 Subject: [PATCH 2/2] Document workflow and add scenario visualization --- README.md | 289 ++++++++++++-------------- scripts/render_waymo_follow_3d.py | 267 ++++++++++++++++++++++++ scripts/visualize_waymo_json.py | 331 ++++++++++++++++++++++++++++++ 3 files changed, 732 insertions(+), 155 deletions(-) create mode 100644 scripts/render_waymo_follow_3d.py create mode 100644 scripts/visualize_waymo_json.py diff --git a/README.md b/README.md index 00aae40fd7..53c2646072 100644 --- a/README.md +++ b/README.md @@ -1,224 +1,203 @@ -# PufferDrive +# PufferDrive Minimal PPO on Windows and WSL -[![Unit Tests](https://github.com/Emerge-Lab/PufferDrive/actions/workflows/utest.yml/badge.svg)](https://github.com/Emerge-Lab/PufferDrive/actions/workflows/utest.yml) +This branch adds a small reinforcement learning workflow on top of the +original [PufferDrive](https://github.com/Emerge-Lab/PufferDrive) project. - +The goal is simple: -**PufferDrive is a fast and friendly driving simulator to train and test RL-based models.** +1. Export Waymo Motion Dataset scenarios as JSON. +2. Convert the JSON files to PufferDrive map binaries. +3. Train a small continuous-action PPO policy. +4. Render the trained policy as an MP4 video. -
-
-
-
-
-
-
-
-
-
+This is a working training example, not a finished autonomous driving model. +The default short run is mainly useful for checking that the complete pipeline +works. ---- +## Why WSL is used -**Docs**: https://emerge-lab.github.io/PufferDrive +The native PufferDrive renderer uses Linux libraries and Raylib. On Windows, +the easiest setup is: ---- +- Keep the Git repository on the Windows drive. +- Run compilation, training, and native 3D rendering inside WSL. +- Copy checkpoints and videos back to the Windows repository. -### See our 2.0 release video +The setup script creates a Linux copy at: - - PufferDrive 2.0 - - -## Installation - -Clone the repo -```bash -https://github.com/Emerge-Lab/PufferDrive.git -``` - -Make a venv (`uv venv`), activate the venv -``` -source .venv/bin/activate +```text +~/PufferDrive-native ``` -Inside the venv, install the dependencies -``` -uv pip install -e . -``` - -Compile the C code -``` -python setup.py build_ext --inplace --force -``` -Run this while your virtual environment is active so the extension is built against the right interpreter. +Your original Windows files remain under: -To test your setup, you can run +```text +/mnt/c/Users//Desktop/PufferDrive ``` -puffer train puffer_drive -``` -See also the [puffer docs](https://puffer.ai/docs.html). +## 1. Clone this branch -## Quick start +From PowerShell: -Start a training run -``` -puffer train puffer_drive +```powershell +git clone --branch codex/minimal-ppo-wsl https://github.com/HC-Seaple/PufferDrive.git +cd PufferDrive ``` -### Minimal PPO on Windows/WSL - -For an inspectable continuous-action PPO implementation, Waymo JSON map -preparation, and native third-person checkpoint rendering, see -[Minimal PPO training](docs/src/minimal-ppo.md). +If Ubuntu is not installed in WSL, open PowerShell as Administrator: -## Dataset +```powershell +powershell -ExecutionPolicy Bypass -File scripts/install_wsl_admin.ps1 +``` -
-Downloading and using data +Restart Windows if requested, then open Ubuntu or run `wsl`. -### Data preparation +## 2. Build the native environment -To train with PufferDrive, you need to convert JSON files to map binaries. Run the following command with the path to your data folder: +From WSL: ```bash -python pufferlib/ocean/drive/drive.py +cd /mnt/c/Users//Desktop/PufferDrive +bash scripts/wsl_native_3d_setup.sh ``` -### Downloading Waymo Data - -You can download the WOMD data from Hugging Face in two versions: - -- **Mini dataset**: [GPUDrive_mini](https://huggingface.co/datasets/EMERGE-lab/GPUDrive_mini) contains 1,000 training files and 300 test/validation files -- **Medium dataset**: [10,000 files from the training dataset](https://huggingface.co/datasets/daphne-cornelisse/pufferdrive_train) -- **Large dataset**: [GPUDrive](https://huggingface.co/datasets/EMERGE-lab/GPUDrive) contains 100,000 unique scenes +This installs the Linux dependencies, creates a Python environment, copies the +repository to the Linux filesystem, and builds the native extension. -**Note**: Replace 'GPUDrive_mini' with 'GPUDrive' in your download commands if you want to use the full dataset. +The warning messages from the C compiler about ignored return values are not +fatal. The setup is successful when it prints: -### Additional Data Sources - -For more training data compatible with PufferDrive, see [ScenarioMax](https://github.com/valeoai/ScenarioMax). The GPUDrive data format is fully compatible with PufferDrive. -
- - -## Visualizer - -
-Dependencies and usage +```text +Done. Native build is ready +``` -## Local rendering +## 3. Prepare Waymo scenarios -To launch an interactive renderer, first build: -``` -bash scripts/build_ocean.sh drive local -``` +Put one or more exported scenario JSON files in the Windows repository. Then +run from WSL: -then launch: ```bash -./drive +cd /mnt/c/Users//Desktop/PufferDrive +bash scripts/prepare_waymo_maps_wsl.sh \ + ./scenario_a.json \ + ./scenario_b.json ``` -this will run `demo()` with an existing model checkpoint. - -## Headless server setup -Run the Raylib visualizer on a headless server and export as .mp4. This will rollout the pre-trained policy in the env. +The binary maps are written to the Linux-native repository: -### Install dependencies - -```bash -sudo apt update -sudo apt install ffmpeg xvfb +```text +~/PufferDrive-native/resources/drive/binaries/training ``` -For HPC (There are no root privileges), so install into the conda environment -```bash -conda install -c conda-forge xorg-x11-server-xvfb-cos6-x86_64 -conda install -c conda-forge ffmpeg +The files must be contiguous: + +```text +map_000.bin +map_001.bin +map_002.bin ``` -- `ffmpeg`: Video processing and conversion -- `xvfb`: Virtual display for headless environments +## 4. Run a small training test -### Build and run +From the Windows-mounted repository in WSL: -1. Build the application: ```bash -bash scripts/build_ocean.sh visualize local +bash scripts/run_minimal_ppo_wsl.sh \ + --map-dir resources/drive/binaries/training \ + --num-maps 2 \ + --num-envs 1 \ + --total-timesteps 10000 \ + --rollout-steps 128 \ + --minibatch-size 128 ``` -2. Run with virtual display: -```bash -xvfb-run -s "-screen 0 1280x720x24" ./visualize -``` +Change `--num-maps` to match the number of prepared map files. -The `-s` flag sets up a virtual screen at 1280x720 resolution with 24-bit color depth. +Checkpoints are saved under: ---- +```text +~/PufferDrive-native/checkpoints/minimal_ppo +``` -> To force a rebuild, you can delete the cached compiled executable binary using `rm ./visualize`. +The training is running correctly when the step count increases, the losses +remain finite, and `.pt` checkpoint files are created. ---- +## 5. Render the trained policy -
+```bash +bash scripts/visualize_minimal_ppo_wsl.sh \ + --map-dir resources/drive/binaries/training \ + --num-maps 2 \ + --episode-length 91 \ + --draw-traces +``` +The script renders with the native Raylib 3D renderer and copies the MP4 and +JSON metrics to the Windows folder: -## Benchmarks +```text +training_visualizations/ +``` -### Distributional realism +## Visualize a Waymo JSON without training -We provide a PufferDrive implementation of the [Waymo Open Sim Agents Challenge (WOSAC)](https://waymo.com/open/challenges/2025/sim-agents/) for fast, easy evaluation of how well your trained agent matches distributional properties of human behavior. See documentation [here](https://emerge-lab.github.io/PufferDrive/wosac/). +The following commands use the Windows virtual environment. -WOSAC evaluation with random policy: -```bash -puffer eval puffer_drive --eval.wosac-realism-eval True -``` +Create a top-down replay: -WOSAC evaluation with your checkpoint (must be .pt file): -```bash -puffer eval puffer_drive --eval.wosac-realism-eval True --load-model-path .pt +```powershell +.\.venv\Scripts\python.exe scripts\visualize_waymo_json.py scenario.json ``` -### Human-compatibility +Create a simple 3D chase-camera replay for a selected Waymo track: -You may be interested in how compatible your agent is with human partners. For this purpose, we support an eval where your policy only controls the self-driving car (SDC). The rest of the agents in the scene are stepped using the logs. While it is not a perfect eval since the human partners here are static, it will still give you a sense of how closely aligned your agent's behavior is to how people drive. You can run it like this: -```bash -puffer eval puffer_drive --eval.human-replay-eval True --load-model-path .pt +```powershell +.\.venv\Scripts\python.exe scripts\render_waymo_follow_3d.py scenario.json ` + --track-index 90 ` + --start-frame 0 ` + --end-frame 60 ``` -## Development +The lightweight chase-camera renderer uses boxes and map lines. It is useful +for quickly checking a recorded trajectory without compiling Raylib. The +native checkpoint renderer uses the PufferDrive car models and full native +rendering. -
Documentation and browser demo +Outputs are written to: -**Docs** - -A browsable documentation site now lives under `docs/` and is configured with mkbooks. To preview locally: -``` -brew install mdbook -mdbook serve --open docs +```text +visualizations/ ``` -Open the served URL to see a local version of the docs. -**Interactive demo** +## Main files -To edit the browser demo, follow these steps: -- Download [emscripten](https://github.com/emscripten-core/emscripten) -- emscripten install latest -- Activate: `source emsdk/emsdk_env.sh` -- Run `bash scripts/build_ocean.sh drive web` -- This generates a number of `game*` files, move them to `assets/` to include them on the webpage +| File | Purpose | +| --- | --- | +| `scripts/minimal_ppo_train.py` | Small PPO actor-critic training loop | +| `scripts/parallel_data_collect.py` | Original rollout pattern used by the trainer | +| `scripts/prepare_waymo_maps.py` | Converts Waymo JSON to PufferDrive maps | +| `scripts/run_minimal_ppo_wsl.sh` | Starts training in the Linux-native copy | +| `scripts/visualize_minimal_ppo.py` | Runs a checkpoint and records native 3D video | +| `scripts/visualize_waymo_json.py` | Creates a top-down JSON replay | +| `scripts/render_waymo_follow_3d.py` | Creates a lightweight 3D chase replay | +| `docs/src/minimal-ppo.md` | More detail about PPO and command options | -
+## Current limitation +A 10,000-step run only verifies the architecture. It is usually not enough to +learn good driving. Early policies may reverse, steer poorly, or fail to reach +the goal. -## Citation +For a useful model, use more scenarios and training steps, then evaluate on +scenarios that were not used for training. Useful measurements include: -If you use PufferDrive in your research, please cite: -```bibtex -@software{pufferdrive2025github, - author = {Daphne Cornelisse* and Spencer Cheng* and Pragnay Mandavilli and Julian Hunt and Kevin Joseph and Waƫl Doulazmi and Valentin Charraut and Aditya Gupta and Joseph Suarez and Eugene Vinitsky}, - title = {{PufferDrive}: A Fast and Friendly Driving Simulator for Training and Evaluating {RL} Agents}, - url = {https://github.com/Emerge-Lab/PufferDrive}, - version = {2.0.0}, - year = {2025}, -} -``` +- goal completion rate +- collision rate +- off-road rate +- reverse-motion frequency +- average episode return + +## Upstream project + +PufferDrive is developed by Emerge Lab. The original documentation is +available at . diff --git a/scripts/render_waymo_follow_3d.py b/scripts/render_waymo_follow_3d.py new file mode 100644 index 0000000000..23460cdf04 --- /dev/null +++ b/scripts/render_waymo_follow_3d.py @@ -0,0 +1,267 @@ +import argparse +import json +import math +from pathlib import Path + +import imageio.v2 as imageio +import numpy as np +from PIL import Image, ImageDraw, ImageFont + + +ROAD_COLORS = { + "road_edge": (222, 226, 229), + "road_line": (244, 198, 63), + "lane": (91, 98, 105), + "crosswalk": (201, 205, 209), + "speed_bump": (218, 135, 63), + "driveway": (112, 124, 105), +} + + +def normalize(vector): + length = np.linalg.norm(vector) + return vector / max(length, 1e-8) + + +def feature_geometry(feature): + for feature_type in ROAD_COLORS: + if feature_type not in feature: + continue + data = feature[feature_type] + points = data.get("polyline") or data.get("polygon") or [] + if len(points) >= 2: + return feature_type, points + return None, None + + +class ChaseCamera: + def __init__(self, state, width, height): + heading = float(state.get("heading", 0.0)) + forward_xy = np.array([math.cos(heading), math.sin(heading), 0.0]) + target = np.array([state["center_x"], state["center_y"], 1.0]) + self.position = target - forward_xy * 15.0 + np.array([0.0, 0.0, 8.0]) + look_at = target + forward_xy * 9.0 + np.array([0.0, 0.0, 0.5]) + self.forward = normalize(look_at - self.position) + self.right = normalize(np.cross(self.forward, np.array([0.0, 0.0, 1.0]))) + self.up = normalize(np.cross(self.right, self.forward)) + self.width = width + self.height = height + self.focal = width / (2 * math.tan(math.radians(68) / 2)) + + def project(self, point): + relative = np.asarray(point, dtype=float) - self.position + depth = float(np.dot(relative, self.forward)) + if depth < 0.8: + return None + x = float(np.dot(relative, self.right)) + y = float(np.dot(relative, self.up)) + return ( + self.width / 2 + self.focal * x / depth, + self.height * 0.53 - self.focal * y / depth, + depth, + ) + + +def car_vertices(state): + heading = float(state.get("heading", 0.0)) + length = float(state.get("length", 4.5)) + width = float(state.get("width", 1.9)) + height = float(state.get("height", 1.6)) + center = np.array([state["center_x"], state["center_y"]]) + forward = np.array([math.cos(heading), math.sin(heading)]) + side = np.array([-forward[1], forward[0]]) + footprint = [ + center + forward * length / 2 + side * width / 2, + center + forward * length / 2 - side * width / 2, + center - forward * length / 2 - side * width / 2, + center - forward * length / 2 + side * width / 2, + ] + return [ + np.array([point[0], point[1], z]) + for z in (0.08, height) + for point in footprint + ] + + +def shade(color, factor): + return tuple(max(0, min(255, int(channel * factor))) for channel in color) + + +def car_faces(state, color, camera): + vertices = car_vertices(state) + faces = [ + ([4, 5, 6, 7], shade(color, 1.12)), + ([0, 1, 5, 4], shade(color, 0.92)), + ([1, 2, 6, 5], shade(color, 0.72)), + ([2, 3, 7, 6], shade(color, 0.82)), + ([3, 0, 4, 7], shade(color, 0.68)), + ] + projected_faces = [] + for indices, face_color in faces: + projected = [camera.project(vertices[index]) for index in indices] + if any(point is None for point in projected): + continue + depth = sum(point[2] for point in projected) / len(projected) + projected_faces.append((depth, [(point[0], point[1]) for point in projected], face_color)) + return projected_faces + + +def draw_ground(draw, camera, target_state): + heading = float(target_state.get("heading", 0.0)) + forward = np.array([math.cos(heading), math.sin(heading)]) + side = np.array([-forward[1], forward[0]]) + center = np.array([target_state["center_x"], target_state["center_y"]]) + + for lateral in range(-40, 41, 5): + points = [ + camera.project((*point, 0.0)) + for point in (center + side * lateral + forward * distance for distance in range(-10, 91, 5)) + ] + points = [(point[0], point[1]) for point in points if point] + if len(points) >= 2: + draw.line(points, fill=(48, 54, 58), width=1) + + for distance in range(-5, 91, 5): + points = [ + camera.project((*point, 0.0)) + for point in (center + forward * distance + side * lateral for lateral in range(-40, 41, 4)) + ] + points = [(point[0], point[1]) for point in points if point] + if len(points) >= 2: + draw.line(points, fill=(48, 54, 58), width=1) + + +def draw_map(draw, camera, map_features, target_state): + target_xy = np.array([target_state["center_x"], target_state["center_y"]]) + for feature_type, points in map_features: + visible = [] + for point in points: + point_xy = np.array([point["x"], point["y"]]) + if np.linalg.norm(point_xy - target_xy) > 105: + if len(visible) >= 2: + draw.line(visible, fill=ROAD_COLORS[feature_type], width=2) + visible = [] + continue + projected = camera.project((point["x"], point["y"], 0.05)) + if projected: + visible.append((projected[0], projected[1])) + if len(visible) >= 2: + width = 3 if feature_type in {"road_edge", "road_line"} else 1 + draw.line(visible, fill=ROAD_COLORS[feature_type], width=width, joint="curve") + + +def render_frame(data, map_features, target_track_index, frame, width, height): + target_state = data["tracks"][target_track_index]["states"][frame] + camera = ChaseCamera(target_state, width, height) + image = Image.new("RGB", (width, height), (18, 23, 27)) + draw = ImageDraw.Draw(image) + + horizon = int(height * 0.48) + draw.rectangle((0, horizon, width, height), fill=(31, 36, 39)) + draw_ground(draw, camera, target_state) + draw_map(draw, camera, map_features, target_state) + + all_faces = [] + target_xy = np.array([target_state["center_x"], target_state["center_y"]]) + for track_index, track in enumerate(data["tracks"]): + states = track.get("states", []) + if frame >= len(states): + continue + state = states[frame] + if not state.get("valid"): + continue + distance = np.linalg.norm( + np.array([state["center_x"], state["center_y"]]) - target_xy + ) + if distance > 75: + continue + if track_index == target_track_index: + color = (0, 190, 242) + elif track.get("object_type") == "TYPE_PEDESTRIAN": + color = (178, 102, 220) + else: + color = (222, 83, 58) + all_faces.extend(car_faces(state, color, camera)) + + for _, polygon, color in sorted(all_faces, key=lambda item: item[0], reverse=True): + draw.polygon(polygon, fill=color, outline=(235, 239, 242)) + + timestamp = data.get("timestamps_seconds", []) + seconds = timestamp[frame] if frame < len(timestamp) else frame / 10 + draw.rounded_rectangle((18, 18, 440, 73), radius=6, fill=(9, 13, 16)) + draw.text( + (32, 29), + f"Roundabout chase | track {target_track_index} | t={seconds:.1f}s", + fill=(238, 242, 244), + font=ImageFont.load_default(), + ) + draw.text( + (32, 50), + "Waymo recorded trajectory", + fill=(0, 190, 242), + font=ImageFont.load_default(), + ) + return image + + +def main(): + parser = argparse.ArgumentParser() + parser.add_argument("json_file", type=Path) + parser.add_argument("--track-index", type=int, default=90) + parser.add_argument("--start-frame", type=int, default=0) + parser.add_argument("--end-frame", type=int, default=60) + parser.add_argument("--fps", type=int, default=10) + parser.add_argument("--width", type=int, default=1280) + parser.add_argument("--height", type=int, default=720) + parser.add_argument("--output", type=Path) + args = parser.parse_args() + + with args.json_file.open("r", encoding="utf-8") as file: + data = json.load(file) + + map_features = [] + for feature in data.get("map_features", []): + feature_type, points = feature_geometry(feature) + if points: + map_features.append((feature_type, points)) + + scenario_id = data.get("scenario_id", args.json_file.stem) + output = args.output or Path("visualizations") / f"{scenario_id}_roundabout_follow_3d.mp4" + output.parent.mkdir(parents=True, exist_ok=True) + preview = output.with_suffix(".png") + + frames = [] + skipped_frames = [] + for frame_index in range(args.start_frame, args.end_frame + 1): + target_states = data["tracks"][args.track_index].get("states", []) + if frame_index >= len(target_states): + skipped_frames.append(frame_index) + continue + target_state = target_states[frame_index] + if not target_state.get("valid") or "center_x" not in target_state: + skipped_frames.append(frame_index) + continue + frame = render_frame( + data, + map_features, + args.track_index, + frame_index, + args.width, + args.height, + ) + if frame_index == args.start_frame: + frame.save(preview) + frames.append(np.asarray(frame)) + if frame_index % 10 == 0: + print(f"rendered frame {frame_index:02d}/{args.end_frame}") + + imageio.mimsave(output, frames, fps=args.fps, macro_block_size=16) + print(f"video: {output}") + print(f"preview: {preview}") + print(f"frames: {len(frames)}, duration: {len(frames) / args.fps:.1f}s") + if skipped_frames: + print(f"skipped invalid source frames: {skipped_frames}") + + +if __name__ == "__main__": + main() diff --git a/scripts/visualize_waymo_json.py b/scripts/visualize_waymo_json.py new file mode 100644 index 0000000000..bcbadb431e --- /dev/null +++ b/scripts/visualize_waymo_json.py @@ -0,0 +1,331 @@ +import argparse +import json +from pathlib import Path + +import imageio.v2 as imageio +import numpy as np +from PIL import Image, ImageDraw, ImageFont + + +ROAD_STYLES = { + "lane": ((150, 156, 162), 1), + "road_edge": ((55, 64, 72), 2), + "road_line": ((206, 165, 48), 2), + "crosswalk": ((124, 111, 176), 2), + "speed_bump": ((214, 111, 44), 2), + "stop_sign": ((191, 63, 47), 4), + "driveway": ((123, 139, 92), 1), +} + +OBJECT_TYPE_MAP = { + "TYPE_VEHICLE": "vehicle", + "TYPE_PEDESTRIAN": "pedestrian", + "TYPE_CYCLIST": "cyclist", +} + + +def point_dict(point): + return { + "x": float(point.get("x", point.get("center_x", 0.0))), + "y": float(point.get("y", point.get("center_y", 0.0))), + "z": float(point.get("z", point.get("center_z", 0.0))), + } + + +def convert_raw_waymo(data): + if "objects" in data and "roads" in data: + return data + if "tracks" not in data or "map_features" not in data: + raise ValueError("Unsupported JSON schema: expected objects/roads or tracks/map_features") + + objects = [] + for track in data["tracks"]: + states = track.get("states", []) + valid_states = [state for state in states if state.get("valid")] + dimensions = valid_states[0] if valid_states else (states[0] if states else {}) + objects.append( + { + "id": track.get("id"), + "type": OBJECT_TYPE_MAP.get(track.get("object_type"), "vehicle"), + "position": [point_dict(state) for state in states], + "heading": [float(state.get("heading", 0.0)) for state in states], + "velocity": [ + { + "x": float(state.get("velocity_x", 0.0)), + "y": float(state.get("velocity_y", 0.0)), + "z": 0.0, + } + for state in states + ], + "valid": [bool(state.get("valid")) for state in states], + "length": float(dimensions.get("length", 4.0)), + "width": float(dimensions.get("width", 2.0)), + "height": float(dimensions.get("height", 1.5)), + "goalPosition": point_dict(valid_states[-1]) if valid_states else {"x": 0, "y": 0, "z": 0}, + "mark_as_expert": False, + } + ) + + roads = [] + for feature_index, feature in enumerate(data["map_features"]): + feature_id = feature.get("id", feature_index) + for road_type in ROAD_STYLES: + if road_type not in feature: + continue + feature_data = feature[road_type] + geometry = feature_data.get("polyline") or feature_data.get("polygon") or [] + if not geometry and feature_data.get("position"): + geometry = [feature_data["position"]] + roads.append( + { + "id": feature_id, + "map_element_id": feature_id, + "type": road_type, + "geometry": [point_dict(point) for point in geometry], + } + ) + break + + return { + "name": data.get("scenario_id", "waymo_scenario"), + "scenario_id": data.get("scenario_id"), + "objects": objects, + "roads": roads, + "timestamps_seconds": data.get("timestamps_seconds", []), + "metadata": { + "sdc_track_index": data.get("sdc_track_index"), + "tracks_to_predict": data.get("tracks_to_predict", []), + }, + } + + +def valid_at(obj, frame): + valid = obj.get("valid") + return valid is None or frame < len(valid) and bool(valid[frame]) + + +def point_at(obj, frame): + pos = obj.get("position", []) + if frame >= len(pos): + return None + return pos[frame] + + +def bounds_for(data): + xs = [] + ys = [] + for road in data.get("roads", []): + for point in road.get("geometry", []): + xs.append(point["x"]) + ys.append(point["y"]) + + for obj in data.get("objects", []): + for frame, point in enumerate(obj.get("position", [])): + if valid_at(obj, frame): + xs.append(point["x"]) + ys.append(point["y"]) + + if not xs or not ys: + raise ValueError("Scenario has no drawable x/y coordinates") + + return min(xs), max(xs), min(ys), max(ys) + + +def make_transform(bounds, width, height, pad): + min_x, max_x, min_y, max_y = bounds + world_w = max(max_x - min_x, 1.0) + world_h = max(max_y - min_y, 1.0) + scale = min((width - 2 * pad) / world_w, (height - 2 * pad) / world_h) + extra_x = (width - 2 * pad - world_w * scale) / 2 + extra_y = (height - 2 * pad - world_h * scale) / 2 + + def transform(x, y): + px = pad + extra_x + (x - min_x) * scale + py = height - pad - extra_y - (y - min_y) * scale + return px, py + + return transform, scale + + +def object_color(data, index, obj): + metadata = data.get("metadata", {}) + predicted = {track.get("track_index") for track in metadata.get("tracks_to_predict", [])} + if index == metadata.get("sdc_track_index"): + return 25, 103, 210 + if index in predicted or obj.get("mark_as_expert"): + return 191, 63, 47 + if obj.get("type") in {"pedestrian", "cyclist"}: + return 141, 90, 194 + return 44, 139, 87 + + +def draw_polyline(draw, points, transform, fill, width): + coords = [] + for point in points: + x = point.get("x") + y = point.get("y") + if isinstance(x, (int, float)) and isinstance(y, (int, float)): + coords.append(transform(x, y)) + if len(coords) >= 2: + draw.line(coords, fill=fill, width=width, joint="curve") + + +def draw_object(draw, data, obj, index, frame, transform, scale, trails, highlight_index=None): + point = point_at(obj, frame) + if not point or not valid_at(obj, frame): + return + + highlighted = index == highlight_index + color = (0, 173, 239) if highlighted else object_color(data, index, obj) + if trails: + trail = [] + for i in range(frame + 1): + trail_point = point_at(obj, i) + if trail_point and valid_at(obj, i): + trail.append(trail_point) + if len(trail) > 1: + draw_polyline(draw, trail, transform, color + ((220 if highlighted else 95),), 4 if highlighted else 2) + + cx, cy = transform(point["x"], point["y"]) + length = max(5, obj.get("length", 4.0) * scale) + width = max(3, obj.get("width", 2.0) * scale) + heading_values = obj.get("heading", []) + heading = heading_values[frame] if frame < len(heading_values) else 0 + + forward = np.array([np.cos(-heading), np.sin(-heading)]) + side = np.array([-forward[1], forward[0]]) + center = np.array([cx, cy]) + corners = [ + center + forward * length / 2 + side * width / 2, + center + forward * length / 2 - side * width / 2, + center - forward * length / 2 - side * width / 2, + center - forward * length / 2 + side * width / 2, + ] + polygon = [tuple(corner) for corner in corners] + if highlighted: + halo = max(8, int(max(length, width) * 0.8)) + draw.ellipse((cx - halo, cy - halo, cx + halo, cy + halo), outline=(0, 173, 239, 210), width=3) + draw.polygon(polygon, fill=color, outline=(255, 255, 255), width=2 if highlighted else 1) + + nose = [ + tuple(center + forward * length / 2), + tuple(center + forward * length * 0.22 + side * width * 0.28), + tuple(center + forward * length * 0.22 - side * width * 0.28), + ] + draw.polygon(nose, fill=(245, 247, 249)) + + +def render_frame(data, frame, transform, scale, width, height, trails, highlight_index=None): + image = Image.new("RGB", (width, height), (235, 231, 223)) + overlay = Image.new("RGBA", (width, height), (0, 0, 0, 0)) + draw = ImageDraw.Draw(overlay, "RGBA") + + for road in data.get("roads", []): + color, line_width = ROAD_STYLES.get(road.get("type"), ((110, 110, 110), 1)) + alpha = 125 if road.get("type") == "lane" else 185 + draw_polyline(draw, road.get("geometry", []), transform, color + (alpha,), line_width) + + for index, obj in enumerate(data.get("objects", [])): + draw_object(draw, data, obj, index, frame, transform, scale, trails, highlight_index) + + timestamp_values = data.get("timestamps_seconds", []) + timestamp = timestamp_values[frame] if frame < len(timestamp_values) else frame / 10 + label = f"{data.get('scenario_id', data.get('name', 'scenario'))} | frame {frame:02d} | t={timestamp:.1f}s" + if highlight_index is not None and highlight_index < len(data.get("objects", [])): + target = data["objects"][highlight_index] + label += f" | target track {highlight_index}, id {target.get('id')}" + ImageDraw.Draw(overlay).rounded_rectangle((10, 8, min(width - 10, 470), 34), radius=4, fill=(245, 247, 249, 220)) + ImageDraw.Draw(overlay).text( + (16, 14), + label, + fill=(29, 37, 44, 230), + font=ImageFont.load_default(), + ) + return Image.alpha_composite(image.convert("RGBA"), overlay).convert("RGB") + + +def render_scenario( + path, + output_dir, + width, + height, + fps, + trails, + focus_center=None, + focus_radius=None, + highlight_index=None, + start_frame=0, + end_frame=None, + output_suffix="", +): + with path.open("r", encoding="utf-8") as f: + data = convert_raw_waymo(json.load(f)) + + max_frame = max(len(obj.get("position", [])) for obj in data.get("objects", [])) - 1 + end_frame = max_frame if end_frame is None else min(end_frame, max_frame) + start_frame = max(0, min(start_frame, end_frame)) + if focus_center and focus_radius: + center_x, center_y = focus_center + bounds = ( + center_x - focus_radius, + center_x + focus_radius, + center_y - focus_radius, + center_y + focus_radius, + ) + else: + bounds = bounds_for(data) + transform, scale = make_transform(bounds, width, height, pad=40) + scenario_id = str(data.get("scenario_id") or path.stem) + output_dir.mkdir(parents=True, exist_ok=True) + mp4_path = output_dir / f"{scenario_id}{output_suffix}.mp4" + png_path = output_dir / f"{scenario_id}{output_suffix}_frame{start_frame:03d}.png" + + frames = [] + for frame in range(start_frame, end_frame + 1): + image = render_frame(data, frame, transform, scale, width, height, trails, highlight_index) + if frame == start_frame: + image.save(png_path) + frames.append(np.asarray(image)) + + imageio.mimsave(mp4_path, frames, fps=fps) + return mp4_path, png_path, len(frames) + + +def main(): + parser = argparse.ArgumentParser() + parser.add_argument("json_files", nargs="+", type=Path) + parser.add_argument("--output-dir", type=Path, default=Path("visualizations")) + parser.add_argument("--width", type=int, default=1280) + parser.add_argument("--height", type=int, default=800) + parser.add_argument("--fps", type=int, default=10) + parser.add_argument("--no-trails", action="store_true") + parser.add_argument("--focus-center", nargs=2, type=float, metavar=("X", "Y")) + parser.add_argument("--focus-radius", type=float) + parser.add_argument("--highlight-track", type=int) + parser.add_argument("--start-frame", type=int, default=0) + parser.add_argument("--end-frame", type=int) + parser.add_argument("--output-suffix", default="") + args = parser.parse_args() + + for path in args.json_files: + mp4_path, png_path, frame_count = render_scenario( + path=path, + output_dir=args.output_dir, + width=args.width, + height=args.height, + fps=args.fps, + trails=not args.no_trails, + focus_center=args.focus_center, + focus_radius=args.focus_radius, + highlight_index=args.highlight_track, + start_frame=args.start_frame, + end_frame=args.end_frame, + output_suffix=args.output_suffix, + ) + print(f"Rendered {path.name}: {frame_count} frames") + print(f" video: {mp4_path}") + print(f" preview: {png_path}") + + +if __name__ == "__main__": + main()