From 96b306af08864f79a13d6e3028fb2ab9aded850c Mon Sep 17 00:00:00 2001
From: Haolin <nyx7ck@virginia.edu>
Date: Sat, 13 Jun 2026 22:27:37 -0400
Subject: [PATCH 1/2] Add reproducible minimal PPO WSL workflow

---
 .gitignore                           |  10 +
 README.md                            |   6 +
 docs/src/minimal-ppo.md              | 159 +++++++++++
 pufferlib/ocean/drive/drive.h        |  22 +-
 scripts/install_wsl_admin.ps1        |  25 ++
 scripts/minimal_ppo_train.py         | 398 +++++++++++++++++++++++++++
 scripts/parallel_data_collect.py     | 118 ++++++++
 scripts/prepare_waymo_maps.py        |  78 ++++++
 scripts/prepare_waymo_maps_wsl.sh    |  36 +++
 scripts/run_minimal_ppo_wsl.sh       |  26 ++
 scripts/visualize_minimal_ppo.py     | 244 ++++++++++++++++
 scripts/visualize_minimal_ppo_wsl.sh |  32 +++
 scripts/wsl_native_3d_setup.sh       |  82 ++++++
 13 files changed, 1235 insertions(+), 1 deletion(-)
 create mode 100644 docs/src/minimal-ppo.md
 create mode 100644 scripts/install_wsl_admin.ps1
 create mode 100644 scripts/minimal_ppo_train.py
 create mode 100644 scripts/parallel_data_collect.py
 create mode 100644 scripts/prepare_waymo_maps.py
 create mode 100644 scripts/prepare_waymo_maps_wsl.sh
 create mode 100644 scripts/run_minimal_ppo_wsl.sh
 create mode 100644 scripts/visualize_minimal_ppo.py
 create mode 100644 scripts/visualize_minimal_ppo_wsl.sh
 create mode 100644 scripts/wsl_native_3d_setup.sh
diff --git a/.gitignore b/.gitignore
index 2bd9405687..baa8a00f28 100644
--- a/.gitignore
+++ b/.gitignore
@@ -190,3 +190,13 @@ pufferlib/resources/drive/output*.gif
 emsdk/
 docs/book/*
 !docs/book/assets/
+
+# Local PufferDrive RL experiments
+/native_3d_renders/
+/training_visualizations/
+/visualizations/
+/outputs/
+/pufferdrive_headless_demo.mp4
+/tfrecord-*.json
+/waymo_viewer_server.*.log
+/pufferlib/resources/drive/waymo_*_json/
diff --git a/README.md b/README.md
index 0b431e7169..00aae40fd7 100644
--- a/README.md
+++ b/README.md
@@ -66,6 +66,12 @@ Start a training run
 puffer train puffer_drive
 ```
 
+### Minimal PPO on Windows/WSL
+
+For an inspectable continuous-action PPO implementation, Waymo JSON map
+preparation, and native third-person checkpoint rendering, see
+[Minimal PPO training](docs/src/minimal-ppo.md).
+
 ## Dataset
 
 <details>
diff --git a/docs/src/minimal-ppo.md b/docs/src/minimal-ppo.md
new file mode 100644
index 0000000000..700d656387
--- /dev/null
+++ b/docs/src/minimal-ppo.md
@@ -0,0 +1,159 @@
+# Minimal PPO training
+
+`scripts/minimal_ppo_train.py` is a small continuous-action PPO implementation
+built directly on the rollout pattern in `scripts/parallel_data_collect.py`.
+It is intended to make the training architecture explicit before moving to the
+full PuffeRL trainer.
+
+## Architecture
+
+Each controlled agent produces one row of the vectorized batch:
+
+```text
+Drive observations
+      |
+      v
+2-layer MLP encoder
+      |
+      +----> Gaussian actor ----> tanh ----> [acceleration, steering]
+      |
+      +----> critic ------------> V(observation)
+```
+
+The trainer repeatedly:
+
+1. Collects a vectorized rollout from `Drive`.
+2. Stores observations, raw policy actions, log probabilities, rewards,
+   terminal flags, and value predictions.
+3. Computes generalized advantage estimates (GAE).
+4. Updates actor and critic with the PPO clipped objective.
+5. Saves checkpoints and runs a short deterministic evaluation.
+
+## What is included
+
+- `scripts/wsl_native_3d_setup.sh`: installs the WSL/Linux dependencies and
+  builds the native Raylib binding on the Linux filesystem.
+- `scripts/prepare_waymo_maps_wsl.sh`: converts one or more exported WOMD JSON
+  scenarios into contiguous `map_000.bin`, `map_001.bin`, ... files.
+- `scripts/minimal_ppo_train.py`: self-contained continuous-action PPO.
+- `scripts/run_minimal_ppo_wsl.sh`: launches training in the Linux-native copy.
+- `scripts/visualize_minimal_ppo.py`: deterministic evaluation with native 3D
+  rendering.
+- `scripts/visualize_minimal_ppo_wsl.sh`: launches evaluation and copies the
+  video and JSON metrics back to Windows.
+
+Waymo scenarios, checkpoints, and rendered videos are deliberately not stored
+in Git. Users must comply with the Waymo Open Dataset license and provide their
+own exported scenario JSON files.
+
+## Clone and set up
+
+From PowerShell, clone the repository and enter WSL:
+
+```powershell
+git clone https://github.com/<your-account>/PufferDrive.git
+cd PufferDrive
+wsl
+```
+
+If WSL/Ubuntu is not installed, open an elevated PowerShell and run:
+
+```powershell
+powershell -ExecutionPolicy Bypass -File scripts/install_wsl_admin.ps1
+```
+
+From the WSL shell:
+
+```bash
+bash scripts/wsl_native_3d_setup.sh
+```
+
+The setup script mirrors a Windows-mounted checkout to
+`~/PufferDrive-native`, creates `~/.venvs/pufferdrive-wsl`, installs the
+dependencies, and builds the native extension there.
+
+## Prepare Waymo scenarios
+
+Pass one or more exported Waymo Motion Dataset scenario JSON files:
+
+```bash
+bash scripts/prepare_waymo_maps_wsl.sh \
+  ./scenario_a.json \
+  ./scenario_b.json
+```
+
+The generated maps live in the Linux-native repository under
+`resources/drive/binaries/training`. The order of the JSON arguments determines
+the contiguous map indices.
+
+## WSL smoke test
+
+Run this from the Windows-mounted repository:
+
+```bash
+bash scripts/run_minimal_ppo_wsl.sh \
+  --map-dir resources/drive/binaries/training \
+  --num-maps 2 \
+  --num-envs 1 \
+  --total-timesteps 10000 \
+  --rollout-steps 128 \
+  --minibatch-size 128 \
+  --checkpoint-interval 10
+```
+
+This verifies the training loop but is not enough data or experience to learn
+a useful driving policy.
+
+## Visualize a checkpoint
+
+```bash
+bash scripts/visualize_minimal_ppo_wsl.sh \
+  --map-dir resources/drive/binaries/training \
+  --num-maps 2 \
+  --episode-length 91 \
+  --draw-traces
+```
+
+The command writes an MP4 and a JSON metrics file to
+`training_visualizations/` in the Windows checkout.
+
+## Small experiment
+
+After preparing a directory with multiple contiguous maps named
+`map_000.bin`, `map_001.bin`, and so on:
+
+```bash
+bash scripts/run_minimal_ppo_wsl.sh \
+  --map-dir resources/drive/binaries/training \
+  --num-maps 100 \
+  --num-envs 16 \
+  --total-timesteps 1000000 \
+  --rollout-steps 128 \
+  --minibatch-size 512
+```
+
+The default `goal_behavior=1` samples new lane-based goals after reaching the
+current goal. Use `--goal-behavior 2` to train only against each JSON scenario's
+fixed `goalPosition`.
+
+## Reading the logs
+
+- `reward/step`: mean immediate reward in the latest rollout.
+- `episode_return`: mean completed-episode return over the latest 100 episodes.
+- `policy_loss`: PPO actor loss.
+- `value_loss`: critic regression loss.
+- `entropy`: policy exploration; it normally decreases gradually.
+- `kl`: approximate policy change per update.
+- `clipfrac`: fraction of samples clipped by PPO.
+
+The smoke test succeeds when steps advance, losses remain finite, and
+checkpoints are written. A driving model should instead be judged on held-out
+maps using goal completion, collision rate, and off-road rate.
+
+## Current limitation
+
+The minimal trainer validates the architecture, but its current reward is not
+yet sufficient for high-quality driving. The first smoke-test policy may learn
+undesired motion such as reversing. The next iteration should add explicit
+route-progress reward, reverse-motion penalty, and stronger collision/off-road
+costs before scaling training.
diff --git a/pufferlib/ocean/drive/drive.h b/pufferlib/ocean/drive/drive.h
index 2a55867120..327972c351 100644
--- a/pufferlib/ocean/drive/drive.h
+++ b/pufferlib/ocean/drive/drive.h
@@ -1,4 +1,5 @@
 #include <signal.h>
+#include <errno.h>
 #include <sys/types.h>
 #include <stdlib.h>
 #include <stdio.h>
@@ -2998,6 +2999,22 @@ void draw_scene(Drive *env, Client *client, int mode, int obs_only, int lasers,
     }
 }
 
+static int write_full_frame(int fd, const unsigned char *data, size_t size) {
+    size_t total_written = 0;
+    while (total_written < size) {
+        ssize_t written = write(fd, data + total_written, size - total_written);
+        if (written > 0) {
+            total_written += (size_t)written;
+            continue;
+        }
+        if (written < 0 && errno == EINTR) {
+            continue;
+        }
+        return -1;
+    }
+    return 0;
+}
+
 void c_render(Drive *env, int view_mode, int draw_traces) {
 
     // Create client on first render call
@@ -3095,7 +3112,10 @@ void c_render(Drive *env, int view_mode, int draw_traces) {
 
         unsigned char *screen_data = rlReadScreenPixels((int)client->width, (int)client->height);
         if (screen_data) {
-            write(client->recorder_pipefd[1], screen_data, (int)client->width * (int)client->height * 4);
+            size_t frame_size = (size_t)client->width * (size_t)client->height * 4;
+            if (write_full_frame(client->recorder_pipefd[1], screen_data, frame_size) != 0) {
+                fprintf(stderr, "Failed to write complete render frame to ffmpeg: %s\n", strerror(errno));
+            }
             RL_FREE(screen_data);
         }
     } else { // Pop-up window
diff --git a/scripts/install_wsl_admin.ps1 b/scripts/install_wsl_admin.ps1
new file mode 100644
index 0000000000..b5857ec17f
--- /dev/null
+++ b/scripts/install_wsl_admin.ps1
@@ -0,0 +1,25 @@
+# Run this file from an elevated Windows PowerShell window.
+# It enables WSL2 prerequisites and installs Ubuntu.
+
+$ErrorActionPreference = "Stop"
+
+Write-Host "Enabling Windows Subsystem for Linux..."
+dism.exe /online /enable-feature /featurename:Microsoft-Windows-Subsystem-Linux /all /norestart
+
+Write-Host "Enabling Virtual Machine Platform..."
+dism.exe /online /enable-feature /featurename:VirtualMachinePlatform /all /norestart
+
+Write-Host "Setting WSL 2 as the default version..."
+wsl.exe --set-default-version 2
+
+Write-Host "Installing Ubuntu..."
+wsl.exe --install -d Ubuntu
+
+$repoRoot = (Resolve-Path (Join-Path $PSScriptRoot "..")).Path
+$driveLetter = $repoRoot.Substring(0, 1).ToLowerInvariant()
+$wslRepoRoot = "/mnt/$driveLetter/" + $repoRoot.Substring(3).Replace("\", "/")
+
+Write-Host ""
+Write-Host "If Windows asks for a restart, reboot, open Ubuntu once to create the Linux user, then run:"
+Write-Host "  cd '$wslRepoRoot'"
+Write-Host "  bash scripts/wsl_native_3d_setup.sh"
diff --git a/scripts/minimal_ppo_train.py b/scripts/minimal_ppo_train.py
new file mode 100644
index 0000000000..dba0bf13de
--- /dev/null
+++ b/scripts/minimal_ppo_train.py
@@ -0,0 +1,398 @@
+"""Minimal continuous-action PPO trainer for PufferDrive.
+
+This intentionally keeps the full RL loop in one file so the relationship
+between collection, GAE, PPO updates, and checkpoints is easy to inspect.
+"""
+
+import argparse
+import json
+import os
+import random
+import sys
+import time
+from collections import deque
+from dataclasses import asdict, dataclass
+from pathlib import Path
+
+import numpy as np
+import torch
+from torch import nn
+from torch.distributions import Normal
+
+
+REPO_ROOT = Path(__file__).resolve().parents[1]
+os.chdir(REPO_ROOT)
+sys.path.insert(0, str(REPO_ROOT))
+
+
+@dataclass
+class TrainConfig:
+    map_dir: str
+    num_maps: int
+    num_envs: int
+    controlled_agents_per_env: int
+    episode_length: int
+    resample_frequency: int
+    goal_behavior: int
+    goal_target_distance: float
+    total_timesteps: int
+    rollout_steps: int
+    update_epochs: int
+    minibatch_size: int
+    learning_rate: float
+    gamma: float
+    gae_lambda: float
+    clip_coef: float
+    value_coef: float
+    entropy_coef: float
+    max_grad_norm: float
+    hidden_size: int
+    seed: int
+    device: str
+    checkpoint_dir: str
+    checkpoint_interval: int
+    eval_steps: int
+    resume: str | None
+
+
+class ActorCritic(nn.Module):
+    def __init__(self, observation_dim, action_dim, hidden_size):
+        super().__init__()
+        self.encoder = nn.Sequential(
+            nn.Linear(observation_dim, hidden_size),
+            nn.LayerNorm(hidden_size),
+            nn.Tanh(),
+            nn.Linear(hidden_size, hidden_size),
+            nn.Tanh(),
+        )
+        self.actor_mean = nn.Linear(hidden_size, action_dim)
+        self.log_std = nn.Parameter(torch.full((action_dim,), -0.5))
+        self.critic = nn.Linear(hidden_size, 1)
+
+        nn.init.orthogonal_(self.actor_mean.weight, gain=0.01)
+        nn.init.constant_(self.actor_mean.bias, 0)
+        nn.init.orthogonal_(self.critic.weight, gain=1.0)
+        nn.init.constant_(self.critic.bias, 0)
+
+    def distribution_and_value(self, observations):
+        hidden = self.encoder(observations)
+        mean = self.actor_mean(hidden)
+        std = self.log_std.exp().expand_as(mean)
+        return Normal(mean, std), self.critic(hidden).squeeze(-1)
+
+    @torch.no_grad()
+    def sample_action(self, observations):
+        distribution, value = self.distribution_and_value(observations)
+        raw_action = distribution.sample()
+        env_action = torch.tanh(raw_action)
+        log_prob = distribution.log_prob(raw_action).sum(dim=-1)
+        return env_action, raw_action, log_prob, value
+
+    @torch.no_grad()
+    def deterministic_action(self, observations):
+        distribution, value = self.distribution_and_value(observations)
+        return torch.tanh(distribution.mean), value
+
+    def evaluate_raw_action(self, observations, raw_actions):
+        distribution, value = self.distribution_and_value(observations)
+        log_prob = distribution.log_prob(raw_actions).sum(dim=-1)
+        entropy = distribution.entropy().sum(dim=-1)
+        return log_prob, entropy, value
+
+
+class RolloutBuffer:
+    def __init__(self, steps, agents, observation_dim, action_dim, device):
+        self.observations = torch.zeros((steps, agents, observation_dim), dtype=torch.float32, device=device)
+        self.raw_actions = torch.zeros((steps, agents, action_dim), dtype=torch.float32, device=device)
+        self.log_probs = torch.zeros((steps, agents), dtype=torch.float32, device=device)
+        self.rewards = torch.zeros((steps, agents), dtype=torch.float32, device=device)
+        self.dones = torch.zeros((steps, agents), dtype=torch.float32, device=device)
+        self.values = torch.zeros((steps, agents), dtype=torch.float32, device=device)
+        self.advantages = torch.zeros((steps, agents), dtype=torch.float32, device=device)
+        self.returns = torch.zeros((steps, agents), dtype=torch.float32, device=device)
+
+    def compute_gae(self, last_value, gamma, gae_lambda):
+        last_advantage = torch.zeros_like(last_value)
+        for step in reversed(range(self.rewards.shape[0])):
+            if step == self.rewards.shape[0] - 1:
+                next_value = last_value
+            else:
+                next_value = self.values[step + 1]
+            next_nonterminal = 1.0 - self.dones[step]
+            delta = self.rewards[step] + gamma * next_value * next_nonterminal - self.values[step]
+            last_advantage = delta + gamma * gae_lambda * next_nonterminal * last_advantage
+            self.advantages[step] = last_advantage
+        self.returns.copy_(self.advantages + self.values)
+
+    def flatten(self):
+        return {
+            "observations": self.observations.flatten(0, 1),
+            "raw_actions": self.raw_actions.flatten(0, 1),
+            "log_probs": self.log_probs.flatten(),
+            "values": self.values.flatten(),
+            "advantages": self.advantages.flatten(),
+            "returns": self.returns.flatten(),
+        }
+
+
+def resolve_device(requested):
+    if requested == "auto":
+        return torch.device("cuda" if torch.cuda.is_available() else "cpu")
+    return torch.device(requested)
+
+
+def make_env(config):
+    from pufferlib.ocean.drive.drive import Drive
+
+    map_dir = Path(config.map_dir)
+    if not map_dir.exists():
+        raise FileNotFoundError(f"Map directory does not exist: {map_dir}")
+
+    num_agents = config.num_envs * config.controlled_agents_per_env
+    return Drive(
+        map_dir=str(map_dir),
+        num_maps=config.num_maps,
+        num_agents=num_agents,
+        control_mode="control_mixed_play",
+        init_mode="create_all_valid",
+        goal_behavior=config.goal_behavior,
+        goal_target_distance=config.goal_target_distance,
+        action_type="continuous",
+        episode_length=config.episode_length,
+        termination_mode=0,
+        resample_frequency=config.resample_frequency,
+        render_mode=1,
+        max_controlled_agents=config.controlled_agents_per_env,
+    )
+
+
+def save_checkpoint(path, model, optimizer, config, global_step, update):
+    path.parent.mkdir(parents=True, exist_ok=True)
+    torch.save(
+        {
+            "model": model.state_dict(),
+            "optimizer": optimizer.state_dict(),
+            "config": asdict(config),
+            "global_step": global_step,
+            "update": update,
+        },
+        path,
+    )
+
+
+def ppo_update(model, optimizer, buffer, config):
+    batch = buffer.flatten()
+    advantages = batch["advantages"]
+    advantages = (advantages - advantages.mean()) / (advantages.std() + 1e-8)
+    batch_size = advantages.shape[0]
+    indices = torch.arange(batch_size, device=advantages.device)
+
+    metrics = {
+        "policy_loss": [],
+        "value_loss": [],
+        "entropy": [],
+        "approx_kl": [],
+        "clip_fraction": [],
+    }
+
+    for _ in range(config.update_epochs):
+        permutation = indices[torch.randperm(batch_size, device=indices.device)]
+        for start in range(0, batch_size, config.minibatch_size):
+            mb = permutation[start : start + config.minibatch_size]
+            new_log_prob, entropy, new_value = model.evaluate_raw_action(
+                batch["observations"][mb],
+                batch["raw_actions"][mb],
+            )
+
+            log_ratio = new_log_prob - batch["log_probs"][mb]
+            ratio = log_ratio.exp()
+            mb_advantages = advantages[mb]
+            policy_loss = -torch.min(
+                ratio * mb_advantages,
+                torch.clamp(ratio, 1 - config.clip_coef, 1 + config.clip_coef) * mb_advantages,
+            ).mean()
+
+            value_loss = 0.5 * (new_value - batch["returns"][mb]).pow(2).mean()
+            entropy_loss = entropy.mean()
+            loss = policy_loss + config.value_coef * value_loss - config.entropy_coef * entropy_loss
+
+            optimizer.zero_grad(set_to_none=True)
+            loss.backward()
+            nn.utils.clip_grad_norm_(model.parameters(), config.max_grad_norm)
+            optimizer.step()
+
+            with torch.no_grad():
+                metrics["policy_loss"].append(policy_loss.item())
+                metrics["value_loss"].append(value_loss.item())
+                metrics["entropy"].append(entropy_loss.item())
+                metrics["approx_kl"].append(((ratio - 1) - log_ratio).mean().item())
+                metrics["clip_fraction"].append(((ratio - 1).abs() > config.clip_coef).float().mean().item())
+
+    return {name: float(np.mean(values)) for name, values in metrics.items()}
+
+
+@torch.no_grad()
+def evaluate(model, env, observation, device, steps):
+    rewards = []
+    for _ in range(steps):
+        obs_tensor = torch.as_tensor(observation, dtype=torch.float32, device=device)
+        actions, _ = model.deterministic_action(obs_tensor)
+        observation, reward, terminal, truncation, info = env.step(actions.cpu().numpy().astype(np.float32))
+        rewards.append(float(np.mean(reward)))
+    return observation, float(np.mean(rewards)), float(np.sum(rewards))
+
+
+def train(config):
+    random.seed(config.seed)
+    np.random.seed(config.seed)
+    torch.manual_seed(config.seed)
+    device = resolve_device(config.device)
+
+    print("Creating PufferDrive environment...")
+    env = make_env(config)
+    observation, _ = env.reset(seed=config.seed)
+    num_agents, observation_dim = observation.shape
+    action_dim = env.single_action_space.shape[0]
+
+    print(f"device={device}")
+    print(f"agents={num_agents}, envs={env.num_envs}, obs_dim={observation_dim}, action_dim={action_dim}")
+    print(f"map_ids={env.map_ids}")
+    print(f"scenario_ids={env.scenario_ids}")
+
+    model = ActorCritic(observation_dim, action_dim, config.hidden_size).to(device)
+    optimizer = torch.optim.Adam(model.parameters(), lr=config.learning_rate, eps=1e-5)
+    global_step = 0
+    first_update = 1
+
+    if config.resume:
+        checkpoint = torch.load(config.resume, map_location=device, weights_only=False)
+        model.load_state_dict(checkpoint["model"])
+        optimizer.load_state_dict(checkpoint["optimizer"])
+        global_step = int(checkpoint.get("global_step", 0))
+        first_update = int(checkpoint.get("update", 0)) + 1
+        print(f"Resumed from {config.resume} at step {global_step}")
+
+    steps_per_update = config.rollout_steps * num_agents
+    total_updates = max(1, (config.total_timesteps - global_step + steps_per_update - 1) // steps_per_update)
+    recent_episode_returns = deque(maxlen=100)
+    recent_episode_lengths = deque(maxlen=100)
+    running_returns = np.zeros(num_agents, dtype=np.float32)
+    running_lengths = np.zeros(num_agents, dtype=np.int32)
+    started_at = time.time()
+
+    try:
+        for update_offset in range(total_updates):
+            update = first_update + update_offset
+            buffer = RolloutBuffer(
+                config.rollout_steps,
+                num_agents,
+                observation_dim,
+                action_dim,
+                device,
+            )
+
+            for step in range(config.rollout_steps):
+                obs_tensor = torch.as_tensor(observation, dtype=torch.float32, device=device)
+                env_action, raw_action, log_prob, value = model.sample_action(obs_tensor)
+                next_observation, reward, terminal, truncation, info = env.step(
+                    env_action.cpu().numpy().astype(np.float32)
+                )
+                done = np.logical_or(terminal, truncation)
+
+                buffer.observations[step].copy_(obs_tensor)
+                buffer.raw_actions[step].copy_(raw_action)
+                buffer.log_probs[step].copy_(log_prob)
+                buffer.values[step].copy_(value)
+                buffer.rewards[step].copy_(torch.as_tensor(reward, dtype=torch.float32, device=device))
+                buffer.dones[step].copy_(torch.as_tensor(done, dtype=torch.float32, device=device))
+
+                running_returns += reward
+                running_lengths += 1
+                for agent_index in np.flatnonzero(done):
+                    recent_episode_returns.append(float(running_returns[agent_index]))
+                    recent_episode_lengths.append(int(running_lengths[agent_index]))
+                    running_returns[agent_index] = 0
+                    running_lengths[agent_index] = 0
+
+                observation = next_observation
+                global_step += num_agents
+
+            with torch.no_grad():
+                last_obs = torch.as_tensor(observation, dtype=torch.float32, device=device)
+                _, last_value = model.distribution_and_value(last_obs)
+            buffer.compute_gae(last_value, config.gamma, config.gae_lambda)
+            metrics = ppo_update(model, optimizer, buffer, config)
+
+            elapsed = max(time.time() - started_at, 1e-6)
+            sps = int(global_step / elapsed)
+            mean_reward = float(buffer.rewards.mean().item())
+            episode_return = float(np.mean(recent_episode_returns)) if recent_episode_returns else float("nan")
+            episode_length = float(np.mean(recent_episode_lengths)) if recent_episode_lengths else float("nan")
+            print(
+                f"update={update:04d} step={global_step:09d} sps={sps:6d} "
+                f"reward/step={mean_reward:+.4f} episode_return={episode_return:+.3f} "
+                f"episode_len={episode_length:.1f} policy_loss={metrics['policy_loss']:+.4f} "
+                f"value_loss={metrics['value_loss']:.4f} entropy={metrics['entropy']:.3f} "
+                f"kl={metrics['approx_kl']:.5f} clipfrac={metrics['clip_fraction']:.3f}"
+            )
+
+            if update % config.checkpoint_interval == 0 or global_step >= config.total_timesteps:
+                checkpoint_path = Path(config.checkpoint_dir) / f"ppo_step_{global_step}.pt"
+                save_checkpoint(checkpoint_path, model, optimizer, config, global_step, update)
+                print(f"saved checkpoint: {checkpoint_path}")
+
+            if global_step >= config.total_timesteps:
+                break
+
+        observation, eval_mean_reward, eval_return = evaluate(
+            model,
+            env,
+            observation,
+            device,
+            config.eval_steps,
+        )
+        print(f"deterministic_eval mean_reward={eval_mean_reward:+.4f} return={eval_return:+.3f}")
+
+        final_path = Path(config.checkpoint_dir) / "ppo_final.pt"
+        save_checkpoint(final_path, model, optimizer, config, global_step, update)
+        print(f"saved final checkpoint: {final_path}")
+    finally:
+        env.close()
+
+
+def parse_args():
+    parser = argparse.ArgumentParser(description="Minimal PPO trainer based on parallel_data_collect.py")
+    parser.add_argument("--map-dir", default="resources/drive/binaries")
+    parser.add_argument("--num-maps", type=int, default=1)
+    parser.add_argument("--num-envs", type=int, default=4)
+    parser.add_argument("--controlled-agents-per-env", type=int, default=1)
+    parser.add_argument("--episode-length", type=int, default=91)
+    parser.add_argument("--resample-frequency", type=int, default=910)
+    parser.add_argument("--goal-behavior", type=int, choices=[0, 1, 2], default=1)
+    parser.add_argument("--goal-target-distance", type=float, default=30.0)
+    parser.add_argument("--total-timesteps", type=int, default=100_000)
+    parser.add_argument("--rollout-steps", type=int, default=128)
+    parser.add_argument("--update-epochs", type=int, default=4)
+    parser.add_argument("--minibatch-size", type=int, default=512)
+    parser.add_argument("--learning-rate", type=float, default=3e-4)
+    parser.add_argument("--gamma", type=float, default=0.98)
+    parser.add_argument("--gae-lambda", type=float, default=0.95)
+    parser.add_argument("--clip-coef", type=float, default=0.2)
+    parser.add_argument("--value-coef", type=float, default=0.5)
+    parser.add_argument("--entropy-coef", type=float, default=0.005)
+    parser.add_argument("--max-grad-norm", type=float, default=1.0)
+    parser.add_argument("--hidden-size", type=int, default=256)
+    parser.add_argument("--seed", type=int, default=42)
+    parser.add_argument("--device", choices=["auto", "cpu", "cuda"], default="auto")
+    parser.add_argument("--checkpoint-dir", default="checkpoints/minimal_ppo")
+    parser.add_argument("--checkpoint-interval", type=int, default=10)
+    parser.add_argument("--eval-steps", type=int, default=91)
+    parser.add_argument("--resume")
+    args = parser.parse_args()
+    return TrainConfig(**vars(args))
+
+
+if __name__ == "__main__":
+    config = parse_args()
+    print(json.dumps(asdict(config), indent=2))
+    train(config)
diff --git a/scripts/parallel_data_collect.py b/scripts/parallel_data_collect.py
new file mode 100644
index 0000000000..f0097954a7
--- /dev/null
+++ b/scripts/parallel_data_collect.py
@@ -0,0 +1,118 @@
+import os
+import sys
+from pathlib import Path
+import numpy as np
+
+# Run from the PufferDrive project root so relative resource/config paths work.
+working_dir = Path.cwd()
+while not (working_dir / "pufferlib").exists():
+    if working_dir == working_dir.parent:
+        raise FileNotFoundError("Could not find the PufferDrive project root containing 'pufferlib'")
+    working_dir = working_dir.parent
+os.chdir(working_dir)
+sys.path.append(str(working_dir / "pufferlib" / "ocean" / "drive"))
+
+from pufferlib.ocean.drive.drive import Drive, RenderView
+
+def main():
+    MAP_DIR = "resources/drive/binaries/training"
+
+    if not Path(MAP_DIR).exists():
+        print(f"Warning: {MAP_DIR} not found.")
+        return
+
+    # PufferDrive samples vectorized envs from the first NUM_MAPS map files.
+    # num_agents is the target total number of controlled rows in the batch.
+    NUM_ENVS = 10
+    NUM_MAPS = 100
+    CONTROLLED_AGENTS_PER_ENV = 1
+    NUM_AGENTS = NUM_ENVS * CONTROLLED_AGENTS_PER_ENV
+    EPISODE_LEN = 91
+
+    print("Initializing PufferDrive Environment...")
+    env = Drive(
+        map_dir=MAP_DIR,
+        num_maps=NUM_MAPS,
+        num_agents=NUM_AGENTS,
+        # max_controlled_agents is enforced for control_mixed_play in the C env.
+        control_mode="control_mixed_play",
+        init_mode="create_all_valid",
+        goal_behavior=2,
+        action_type="continuous",
+        episode_length=EPISODE_LEN,
+        render_mode=RenderView.FULL_SIM_STATE,
+        max_controlled_agents=CONTROLLED_AGENTS_PER_ENV,
+    )
+
+    obs, _ = env.reset()
+    print("env obs space:", env.observation_space)
+    print("env action space:", env.action_space)
+    print("single env obs shape:", env.single_observation_space)
+    print("single env action shape:", env.single_action_space)
+
+    num_controlled_agents = obs.shape[0]
+    if env.num_envs != NUM_ENVS or num_controlled_agents != NUM_AGENTS:
+        env.close()
+        raise RuntimeError(
+            f"Expected {NUM_ENVS} envs/{NUM_AGENTS} controlled agents, "
+            f"got {env.num_envs} envs/{num_controlled_agents} controlled agents."
+        )
+
+    print(f"Environment initialized. Batch size (controlled agents): {num_controlled_agents}")
+    print(f"Vectorized env count: {env.num_envs}")
+    print(f"Sampled map ids: {env.map_ids}")
+    print(f"Scenario ids: {env.scenario_ids}")
+
+    # 2. Initialize lists to store our data
+    collected_obs = []
+    collected_actions = []
+    collected_rewards = []
+    collected_terminals = []
+    collected_truncations = []
+
+    print("Starting Parallel Native Render Rollout...")
+    for t in range(EPISODE_LEN):
+        # 3. Store the observation.
+        # CRITICAL: Use .copy() because PufferLib writes to the same C-memory buffer in-place.
+        collected_obs.append(obs.copy())
+
+        # Generate actions for all parallel agents simultaneously
+        rl_actions = np.random.uniform(-1.0, 1.0, size=(num_controlled_agents, 2)).astype(np.float32)
+
+        # Store the actions
+        collected_actions.append(rl_actions.copy())
+
+        # Step the environment
+        obs, rewards, terminals, truncations, infos = env.step(rl_actions)
+        collected_rewards.append(rewards.copy())
+        collected_terminals.append(terminals.copy())
+        collected_truncations.append(truncations.copy())
+
+        # Render only the first environment (env_id=0) out of the 10
+        # env.render(view_mode=RenderView.FULL_SIM_STATE, draw_traces=True, env_id=0)
+
+        if t % 10 == 0:
+            print(f"Processed step {t:02d}/{EPISODE_LEN}...")
+
+        if terminals.all() or truncations.all():
+            print(f"All environments terminated at step {t}.")
+            break
+
+    env.close()
+
+    # 4. Stack the lists into final numpy arrays for easy saving/manipulation
+    final_obs_array = np.stack(collected_obs)          # Shape: (T, 10, obs_dim)
+    final_actions_array = np.stack(collected_actions)  # Shape: (T, 10, 2)
+    final_rewards_array = np.stack(collected_rewards)  # Shape: (T, 10)
+    final_terminals_array = np.stack(collected_terminals)
+    final_truncations_array = np.stack(collected_truncations)
+
+    print("\n=== Data Collection Complete ===")
+    print(f"Collected Obs Shape: {final_obs_array.shape} -> (Time, Agents, Features)")
+    print(f"Collected Actions Shape: {final_actions_array.shape} -> (Time, Agents, Actions)")
+    print(f"Collected Rewards Shape: {final_rewards_array.shape} -> (Time, Agents)")
+    print(f"Collected Terminals Shape: {final_terminals_array.shape} -> (Time, Agents)")
+    print(f"Collected Truncations Shape: {final_truncations_array.shape} -> (Time, Agents)")
+
+if __name__ == "__main__":
+    main()
diff --git a/scripts/prepare_waymo_maps.py b/scripts/prepare_waymo_maps.py
new file mode 100644
index 0000000000..5e6cdd6e51
--- /dev/null
+++ b/scripts/prepare_waymo_maps.py
@@ -0,0 +1,78 @@
+"""Convert exported Waymo Motion JSON scenarios into PufferDrive binaries."""
+
+import argparse
+import json
+import os
+import sys
+from pathlib import Path
+
+
+REPO_ROOT = Path(__file__).resolve().parents[1]
+os.chdir(REPO_ROOT)
+sys.path.insert(0, str(REPO_ROOT))
+
+from pufferlib.ocean.drive.drive import load_map
+
+
+def prepare_scenario(source_path, prepared_path):
+    with source_path.open("r", encoding="utf-8") as file:
+        data = json.load(file)
+
+    sdc_track_index = data.get("metadata", {}).get("sdc_track_index")
+    for index, obj in enumerate(data.get("objects", [])):
+        obj["mark_as_expert"] = index != sdc_track_index
+
+    prepared_path.parent.mkdir(parents=True, exist_ok=True)
+    prepared_path.write_text(json.dumps(data), encoding="utf-8")
+    return str(data.get("scenario_id") or source_path.stem)
+
+
+def main():
+    parser = argparse.ArgumentParser()
+    parser.add_argument("json_files", nargs="+", type=Path)
+    parser.add_argument(
+        "--output-dir",
+        type=Path,
+        default=Path("resources/drive/binaries/training"),
+    )
+    parser.add_argument(
+        "--prepared-json-dir",
+        type=Path,
+        default=Path("resources/drive/waymo_training_json"),
+    )
+    args = parser.parse_args()
+
+    args.output_dir.mkdir(parents=True, exist_ok=True)
+    args.prepared_json_dir.mkdir(parents=True, exist_ok=True)
+
+    manifest = []
+    for map_index, source_path in enumerate(args.json_files):
+        if not source_path.is_file():
+            raise FileNotFoundError(source_path)
+
+        prepared_path = args.prepared_json_dir / f"map_{map_index:03d}.json"
+        binary_path = args.output_dir / f"map_{map_index:03d}.bin"
+        scenario_id = prepare_scenario(source_path, prepared_path)
+        load_map(
+            str(prepared_path),
+            unique_map_id=map_index,
+            binary_output=str(binary_path),
+        )
+        manifest.append(
+            {
+                "map_index": map_index,
+                "scenario_id": scenario_id,
+                "source": str(source_path),
+                "binary": str(binary_path),
+            }
+        )
+        print(f"[{map_index + 1}/{len(args.json_files)}] {scenario_id} -> {binary_path}")
+
+    manifest_path = args.output_dir / "manifest.json"
+    manifest_path.write_text(json.dumps(manifest, indent=2), encoding="utf-8")
+    print(f"Prepared {len(manifest)} maps")
+    print(f"Manifest: {manifest_path}")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/scripts/prepare_waymo_maps_wsl.sh b/scripts/prepare_waymo_maps_wsl.sh
new file mode 100644
index 0000000000..b6d5403ab9
--- /dev/null
+++ b/scripts/prepare_waymo_maps_wsl.sh
@@ -0,0 +1,36 @@
+#!/usr/bin/env bash
+set -euo pipefail
+
+WINDOWS_REPO="$(cd "$(dirname "$0")/.." && pwd)"
+NATIVE_REPO="${HOME}/PufferDrive-native"
+VENV_DIR="${HOME}/.venvs/pufferdrive-wsl"
+
+if [[ $# -eq 0 ]]; then
+  echo "Usage: bash scripts/prepare_waymo_maps_wsl.sh SCENARIO.json [SCENARIO.json ...]"
+  exit 2
+fi
+
+if [[ ! -x "$VENV_DIR/bin/python" || ! -d "$NATIVE_REPO/pufferlib" ]]; then
+  echo "Native WSL environment is not ready."
+  echo "Run bash scripts/wsl_native_3d_setup.sh first."
+  exit 1
+fi
+
+json_files=()
+for path in "$@"; do
+  if [[ ! -f "$path" ]]; then
+    echo "Scenario JSON not found: $path"
+    exit 1
+  fi
+  json_files+=("$(realpath "$path")")
+done
+
+cp -f "$WINDOWS_REPO/scripts/prepare_waymo_maps.py" "$NATIVE_REPO/scripts/prepare_waymo_maps.py"
+
+cd "$NATIVE_REPO"
+source "$VENV_DIR/bin/activate"
+export PYTHONPATH="$PWD${PYTHONPATH:+:$PYTHONPATH}"
+
+python scripts/prepare_waymo_maps.py "${json_files[@]}"
+
+echo "Prepared maps in $NATIVE_REPO/resources/drive/binaries/training"
diff --git a/scripts/run_minimal_ppo_wsl.sh b/scripts/run_minimal_ppo_wsl.sh
new file mode 100644
index 0000000000..646b253dca
--- /dev/null
+++ b/scripts/run_minimal_ppo_wsl.sh
@@ -0,0 +1,26 @@
+#!/usr/bin/env bash
+set -euo pipefail
+
+WINDOWS_REPO="$(cd "$(dirname "$0")/.." && pwd)"
+NATIVE_REPO="${HOME}/PufferDrive-native"
+VENV_DIR="${HOME}/.venvs/pufferdrive-wsl"
+
+if [[ ! -d "$NATIVE_REPO/pufferlib" ]]; then
+  echo "Native repository not found: $NATIVE_REPO"
+  echo "Run bash scripts/wsl_native_3d_setup.sh first."
+  exit 1
+fi
+
+if [[ ! -x "$VENV_DIR/bin/python" ]]; then
+  echo "WSL Python environment not found: $VENV_DIR"
+  echo "Run bash scripts/wsl_native_3d_setup.sh first."
+  exit 1
+fi
+
+cp -f "$WINDOWS_REPO/scripts/minimal_ppo_train.py" "$NATIVE_REPO/scripts/minimal_ppo_train.py"
+
+cd "$NATIVE_REPO"
+source "$VENV_DIR/bin/activate"
+export PYTHONPATH="$PWD${PYTHONPATH:+:$PYTHONPATH}"
+
+python scripts/minimal_ppo_train.py "$@"
diff --git a/scripts/visualize_minimal_ppo.py b/scripts/visualize_minimal_ppo.py
new file mode 100644
index 0000000000..e16e8d5662
--- /dev/null
+++ b/scripts/visualize_minimal_ppo.py
@@ -0,0 +1,244 @@
+"""Render a trained minimal PPO checkpoint with PufferDrive's native 3D view."""
+
+import argparse
+import json
+import os
+import shutil
+import subprocess
+import sys
+import tempfile
+import time
+from pathlib import Path
+
+import numpy as np
+import torch
+
+
+REPO_ROOT = Path(__file__).resolve().parents[1]
+os.chdir(REPO_ROOT)
+sys.path.insert(0, str(REPO_ROOT))
+
+from pufferlib.ocean.drive.drive import Drive, RenderView
+from scripts.minimal_ppo_train import ActorCritic
+
+
+def start_xvfb_if_needed():
+    if os.environ.get("DISPLAY"):
+        return None
+
+    for display_num in range(99, 120):
+        display = f":{display_num}"
+        lock_path = Path(f"/tmp/.X{display_num}-lock")
+        socket_path = Path(f"/tmp/.X11-unix/X{display_num}")
+        if lock_path.exists() or socket_path.exists():
+            continue
+
+        error_log = tempfile.NamedTemporaryFile(prefix="pufferdrive_eval_xvfb_", suffix=".log", delete=False)
+        error_log_path = Path(error_log.name)
+        proc = subprocess.Popen(
+            [
+                "Xvfb",
+                display,
+                "-screen",
+                "0",
+                "1280x720x24",
+                "+extension",
+                "GLX",
+                "-ac",
+                "-noreset",
+            ],
+            stdout=subprocess.DEVNULL,
+            stderr=error_log,
+        )
+        error_log.close()
+        os.environ["DISPLAY"] = display
+
+        for _ in range(40):
+            if proc.poll() is not None:
+                break
+            if lock_path.exists() or socket_path.exists():
+                time.sleep(0.5)
+                error_log_path.unlink(missing_ok=True)
+                return proc
+            time.sleep(0.1)
+
+        proc.terminate()
+        proc.wait(timeout=2)
+        os.environ.pop("DISPLAY", None)
+        detail = error_log_path.read_text(errors="replace").strip()
+        error_log_path.unlink(missing_ok=True)
+        if detail:
+            print(detail)
+
+    raise RuntimeError("Could not start Xvfb")
+
+
+def stop_xvfb(proc):
+    if proc is None:
+        return
+    proc.terminate()
+    try:
+        proc.wait(timeout=2)
+    except subprocess.TimeoutExpired:
+        proc.kill()
+        proc.wait(timeout=2)
+    os.environ.pop("DISPLAY", None)
+
+
+def probe_video(path):
+    command = [
+        "ffprobe",
+        "-v",
+        "error",
+        "-count_frames",
+        "-select_streams",
+        "v:0",
+        "-show_entries",
+        "stream=nb_read_frames,duration,r_frame_rate",
+        "-of",
+        "json",
+        str(path),
+    ]
+    result = subprocess.run(command, check=True, capture_output=True, text=True)
+    stream = json.loads(result.stdout)["streams"][0]
+    return {
+        "video_frames": int(stream.get("nb_read_frames", 0)),
+        "video_duration_seconds": float(stream.get("duration", 0.0)),
+        "video_frame_rate": stream.get("r_frame_rate"),
+    }
+
+
+def render_checkpoint(args):
+    device = torch.device(args.device)
+    checkpoint = torch.load(args.checkpoint, map_location=device, weights_only=False)
+    train_config = checkpoint.get("config", {})
+
+    goal_behavior = args.goal_behavior
+    if goal_behavior is None:
+        goal_behavior = int(train_config.get("goal_behavior", 1))
+
+    goal_target_distance = args.goal_target_distance
+    if goal_target_distance is None:
+        goal_target_distance = float(train_config.get("goal_target_distance", 30.0))
+
+    env = Drive(
+        map_dir=args.map_dir,
+        num_maps=args.num_maps,
+        num_agents=args.num_envs * args.controlled_agents_per_env,
+        control_mode="control_mixed_play",
+        init_mode="create_all_valid",
+        goal_behavior=goal_behavior,
+        goal_target_distance=goal_target_distance,
+        action_type="continuous",
+        episode_length=args.episode_length,
+        termination_mode=0,
+        resample_frequency=0,
+        render_mode=1,
+        human_agent_idx=args.agent_index,
+        max_controlled_agents=args.controlled_agents_per_env,
+    )
+
+    observation, _ = env.reset(seed=args.seed)
+    observation_dim = observation.shape[1]
+    action_dim = env.single_action_space.shape[0]
+    hidden_size = int(train_config.get("hidden_size", 256))
+    model = ActorCritic(observation_dim, action_dim, hidden_size).to(device)
+    model.load_state_dict(checkpoint["model"])
+    model.eval()
+
+    scenario_id = env.scenario_ids[args.env_id]
+    native_output = Path(f"{scenario_id}.mp4")
+    native_output.unlink(missing_ok=True)
+    returns = np.zeros(observation.shape[0], dtype=np.float32)
+    reward_history = []
+    action_history = []
+    xvfb_proc = start_xvfb_if_needed()
+
+    try:
+        env.render(
+            view_mode=RenderView.AGENT_PERSP,
+            draw_traces=args.draw_traces,
+            env_id=args.env_id,
+        )
+        for step in range(args.episode_length):
+            obs_tensor = torch.as_tensor(observation, dtype=torch.float32, device=device)
+            with torch.no_grad():
+                actions, values = model.deterministic_action(obs_tensor)
+            actions_np = actions.cpu().numpy().astype(np.float32)
+            observation, rewards, terminals, truncations, infos = env.step(actions_np)
+            env.render(
+                view_mode=RenderView.AGENT_PERSP,
+                draw_traces=args.draw_traces,
+                env_id=args.env_id,
+            )
+            returns += rewards
+            reward_history.append(float(np.mean(rewards)))
+            action_history.append(actions_np[args.agent_index].tolist())
+            if step % 10 == 0:
+                print(
+                    f"step={step:02d}/{args.episode_length} "
+                    f"mean_reward={np.mean(rewards):+.4f} "
+                    f"ego_action={actions_np[args.agent_index]}"
+                )
+    finally:
+        env.close()
+        stop_xvfb(xvfb_proc)
+
+    if not native_output.exists():
+        raise FileNotFoundError(f"Native renderer did not produce {native_output}")
+
+    args.output_dir.mkdir(parents=True, exist_ok=True)
+    video_path = args.output_dir / f"{scenario_id}_ppo_step_{checkpoint.get('global_step', 0)}.mp4"
+    shutil.move(native_output, video_path)
+    video_probe = probe_video(video_path)
+    if video_probe["video_frames"] < args.episode_length:
+        raise RuntimeError(
+            f"Native renderer produced only {video_probe['video_frames']} frames; "
+            f"expected at least {args.episode_length}"
+        )
+
+    metrics = {
+        "checkpoint": str(args.checkpoint),
+        "checkpoint_global_step": int(checkpoint.get("global_step", 0)),
+        "scenario_id": scenario_id,
+        "episode_length": args.episode_length,
+        "goal_behavior": goal_behavior,
+        "goal_target_distance": goal_target_distance,
+        "mean_reward_per_step": float(np.mean(reward_history)),
+        "mean_agent_return": float(np.mean(returns)),
+        "ego_return": float(returns[args.agent_index]),
+        "mean_absolute_ego_action": np.mean(np.abs(action_history), axis=0).tolist(),
+        "video": str(video_path),
+        **video_probe,
+    }
+    metrics_path = video_path.with_suffix(".json")
+    metrics_path.write_text(json.dumps(metrics, indent=2), encoding="utf-8")
+    print(json.dumps(metrics, indent=2))
+    return video_path, metrics_path
+
+
+def parse_args():
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "--checkpoint",
+        type=Path,
+        default=Path("checkpoints/minimal_ppo/ppo_final.pt"),
+    )
+    parser.add_argument("--map-dir", default="resources/drive/binaries/waymo_native")
+    parser.add_argument("--num-maps", type=int, default=1)
+    parser.add_argument("--num-envs", type=int, default=1)
+    parser.add_argument("--controlled-agents-per-env", type=int, default=1)
+    parser.add_argument("--episode-length", type=int, default=91)
+    parser.add_argument("--goal-behavior", type=int, choices=[0, 1, 2])
+    parser.add_argument("--goal-target-distance", type=float)
+    parser.add_argument("--env-id", type=int, default=0)
+    parser.add_argument("--agent-index", type=int, default=0)
+    parser.add_argument("--seed", type=int, default=42)
+    parser.add_argument("--device", choices=["cpu", "cuda"], default="cpu")
+    parser.add_argument("--draw-traces", action="store_true")
+    parser.add_argument("--output-dir", type=Path, default=Path("training_visualizations"))
+    return parser.parse_args()
+
+
+if __name__ == "__main__":
+    render_checkpoint(parse_args())
diff --git a/scripts/visualize_minimal_ppo_wsl.sh b/scripts/visualize_minimal_ppo_wsl.sh
new file mode 100644
index 0000000000..03d15936b8
--- /dev/null
+++ b/scripts/visualize_minimal_ppo_wsl.sh
@@ -0,0 +1,32 @@
+#!/usr/bin/env bash
+set -euo pipefail
+
+WINDOWS_REPO="$(cd "$(dirname "$0")/.." && pwd)"
+NATIVE_REPO="${HOME}/PufferDrive-native"
+VENV_DIR="${HOME}/.venvs/pufferdrive-wsl"
+
+if [[ ! -f "$NATIVE_REPO/checkpoints/minimal_ppo/ppo_final.pt" ]]; then
+  echo "Checkpoint not found: $NATIVE_REPO/checkpoints/minimal_ppo/ppo_final.pt"
+  exit 1
+fi
+
+cp -f "$WINDOWS_REPO/scripts/minimal_ppo_train.py" "$NATIVE_REPO/scripts/minimal_ppo_train.py"
+cp -f "$WINDOWS_REPO/scripts/visualize_minimal_ppo.py" "$NATIVE_REPO/scripts/visualize_minimal_ppo.py"
+
+cd "$NATIVE_REPO"
+source "$VENV_DIR/bin/activate"
+export PYTHONPATH="$PWD${PYTHONPATH:+:$PYTHONPATH}"
+
+if ! cmp -s "$WINDOWS_REPO/pufferlib/ocean/drive/drive.h" "$NATIVE_REPO/pufferlib/ocean/drive/drive.h"; then
+  echo "Native renderer source changed; rebuilding Ocean binding..."
+  cp -f "$WINDOWS_REPO/pufferlib/ocean/drive/drive.h" "$NATIVE_REPO/pufferlib/ocean/drive/drive.h"
+  NO_TRAIN=1 python setup.py build_ext --inplace --force
+fi
+
+python scripts/visualize_minimal_ppo.py "$@"
+
+mkdir -p "$WINDOWS_REPO/training_visualizations"
+cp -f training_visualizations/*.mp4 "$WINDOWS_REPO/training_visualizations/"
+cp -f training_visualizations/*.json "$WINDOWS_REPO/training_visualizations/"
+
+echo "Copied outputs to $WINDOWS_REPO/training_visualizations"
diff --git a/scripts/wsl_native_3d_setup.sh b/scripts/wsl_native_3d_setup.sh
new file mode 100644
index 0000000000..bac5f1aa3a
--- /dev/null
+++ b/scripts/wsl_native_3d_setup.sh
@@ -0,0 +1,82 @@
+#!/usr/bin/env bash
+set -euo pipefail
+
+cd "$(dirname "$0")/.."
+ORIGINAL_REPO_DIR="${PUFFERDRIVE_NATIVE_ORIGIN:-}"
+
+echo "==> Installing Linux build and render dependencies"
+sudo apt update
+sudo apt install -y \
+  build-essential \
+  clang \
+  curl \
+  ffmpeg \
+  libgl1 \
+  libglib2.0-0 \
+  libx11-dev \
+  libxcursor-dev \
+  libxi-dev \
+  libxinerama-dev \
+  libxrandr-dev \
+  libxxf86vm-dev \
+  mesa-utils \
+  python3-dev \
+  python3-pip \
+  python3-venv \
+  rsync \
+  xvfb
+
+if [[ "$PWD" == /mnt/* && -z "$ORIGINAL_REPO_DIR" ]]; then
+  NATIVE_REPO_DIR="${HOME}/PufferDrive-native"
+  echo "==> Repository is on a Windows mount; mirroring to ${NATIVE_REPO_DIR} for native Linux build"
+  mkdir -p "$NATIVE_REPO_DIR"
+  rsync -a --delete \
+    --exclude ".git/" \
+    --exclude ".venv/" \
+    --exclude ".venv-wsl/" \
+    --exclude ".venv-wsl-build/" \
+    --exclude ".venv-wsl-native/" \
+    --exclude "build/" \
+    --exclude "dist/" \
+    --exclude "*.egg-info/" \
+    "$PWD/" "$NATIVE_REPO_DIR/"
+  echo "==> Restarting setup from Linux filesystem"
+  PUFFERDRIVE_NATIVE_ORIGIN="$PWD" bash "$NATIVE_REPO_DIR/scripts/wsl_native_3d_setup.sh" "$@"
+  exit $?
+fi
+
+if [[ ! -d resources ]]; then
+  echo "==> Creating Linux resources symlink"
+  rm -f resources
+  ln -s pufferlib/resources resources
+fi
+
+echo "==> Creating WSL Python environment"
+VENV_DIR="${HOME}/.venvs/pufferdrive-wsl"
+rm -rf "$VENV_DIR"
+mkdir -p "$(dirname "$VENV_DIR")"
+python3 -m venv --copies "$VENV_DIR"
+source "$VENV_DIR/bin/activate"
+python -m pip install --upgrade pip setuptools wheel
+
+echo "==> Installing Python dependencies"
+python -m pip install \
+  "numpy<2" \
+  Cython \
+  "gym==0.23" \
+  "gymnasium==0.29.1" \
+  "pettingzoo==1.24.1" \
+  "shimmy[gym-v21]" \
+  tqdm \
+  imageio \
+  imageio-ffmpeg \
+  "torch"
+
+echo "==> Building PufferDrive native Ocean binding"
+export PYTHONPATH="$PWD${PYTHONPATH:+:$PYTHONPATH}"
+NO_TRAIN=1 python setup.py build_ext --inplace --force
+
+echo "==> Done. Native build is ready in ${PWD}"
+echo "Next:"
+echo "  bash scripts/prepare_waymo_maps_wsl.sh /path/to/scenario.json"
+echo "  bash scripts/run_minimal_ppo_wsl.sh --map-dir resources/drive/binaries/training --num-maps 1"

From 111adb43d33aadd755662047325a59d724d8317b Mon Sep 17 00:00:00 2001
From: Haolin <nyx7ck@virginia.edu>
Date: Mon, 15 Jun 2026 00:02:47 -0400
Subject: [PATCH 2/2] Document workflow and add scenario visualization

---
 README.md                         | 289 ++++++++++++--------------
 scripts/render_waymo_follow_3d.py | 267 ++++++++++++++++++++++++
 scripts/visualize_waymo_json.py   | 331 ++++++++++++++++++++++++++++++
 3 files changed, 732 insertions(+), 155 deletions(-)
 create mode 100644 scripts/render_waymo_follow_3d.py
 create mode 100644 scripts/visualize_waymo_json.py

diff --git a/README.md b/README.md
index 00aae40fd7..53c2646072 100644
--- a/README.md
+++ b/README.md
@@ -1,224 +1,203 @@
-# PufferDrive
+# PufferDrive Minimal PPO on Windows and WSL
 
-[![Unit Tests](https://github.com/Emerge-Lab/PufferDrive/actions/workflows/utest.yml/badge.svg)](https://github.com/Emerge-Lab/PufferDrive/actions/workflows/utest.yml)
+This branch adds a small reinforcement learning workflow on top of the
+original [PufferDrive](https://github.com/Emerge-Lab/PufferDrive) project.
 
-<img align="left" style="width:260px" src="https://github.com/Emerge-Lab/PufferDrive/blob/main/pufferlib/resources/drive/pufferdrive_20fps_long.gif" width="288px">
+The goal is simple:
 
-**PufferDrive is a fast and friendly driving simulator to train and test RL-based models.**
+1. Export Waymo Motion Dataset scenarios as JSON.
+2. Convert the JSON files to PufferDrive map binaries.
+3. Train a small continuous-action PPO policy.
+4. Render the trained policy as an MP4 video.
 
-<br>
-<br>
-<br>
-<br>
-<br>
-<br>
-<br>
-<br>
-<br>
-<br>
+This is a working training example, not a finished autonomous driving model.
+The default short run is mainly useful for checking that the complete pipeline
+works.
 
----
+## Why WSL is used
 
-**Docs**: https://emerge-lab.github.io/PufferDrive
+The native PufferDrive renderer uses Linux libraries and Raylib. On Windows,
+the easiest setup is:
 
----
+- Keep the Git repository on the Windows drive.
+- Run compilation, training, and native 3D rendering inside WSL.
+- Copy checkpoints and videos back to the Windows repository.
 
-### See our 2.0 release video
+The setup script creates a Linux copy at:
 
-<a href="https://www.youtube.com/watch?v=LfQ324R-cbE">
-  <img src="https://img.youtube.com/vi/LfQ324R-cbE/0.jpg" alt="PufferDrive 2.0" width="300">
-</a>
-
-## Installation
-
-Clone the repo
-```bash
-https://github.com/Emerge-Lab/PufferDrive.git
-```
-
-Make a venv (`uv venv`), activate the venv
-```
-source .venv/bin/activate
+```text
+~/PufferDrive-native
 ```
 
-Inside the venv, install the dependencies
-```
-uv pip install -e .
-```
-
-Compile the C code
-```
-python setup.py build_ext --inplace --force
-```
-Run this while your virtual environment is active so the extension is built against the right interpreter.
+Your original Windows files remain under:
 
-To test your setup, you can run
+```text
+/mnt/c/Users/<username>/Desktop/PufferDrive
 ```
-puffer train puffer_drive
-```
-See also the [puffer docs](https://puffer.ai/docs.html).
 
+## 1. Clone this branch
 
-## Quick start
+From PowerShell:
 
-Start a training run
-```
-puffer train puffer_drive
+```powershell
+git clone --branch codex/minimal-ppo-wsl https://github.com/HC-Seaple/PufferDrive.git
+cd PufferDrive
 ```
 
-### Minimal PPO on Windows/WSL
-
-For an inspectable continuous-action PPO implementation, Waymo JSON map
-preparation, and native third-person checkpoint rendering, see
-[Minimal PPO training](docs/src/minimal-ppo.md).
+If Ubuntu is not installed in WSL, open PowerShell as Administrator:
 
-## Dataset
+```powershell
+powershell -ExecutionPolicy Bypass -File scripts/install_wsl_admin.ps1
+```
 
-<details>
-<summary>Downloading and using data</summary>
+Restart Windows if requested, then open Ubuntu or run `wsl`.
 
-### Data preparation
+## 2. Build the native environment
 
-To train with PufferDrive, you need to convert JSON files to map binaries. Run the following command with the path to your data folder:
+From WSL:
 
 ```bash
-python pufferlib/ocean/drive/drive.py
+cd /mnt/c/Users/<username>/Desktop/PufferDrive
+bash scripts/wsl_native_3d_setup.sh
 ```
 
-### Downloading Waymo Data
-
-You can download the WOMD data from Hugging Face in two versions:
-
-- **Mini dataset**: [GPUDrive_mini](https://huggingface.co/datasets/EMERGE-lab/GPUDrive_mini) contains 1,000 training files and 300 test/validation files
-- **Medium dataset**: [10,000 files from the training dataset](https://huggingface.co/datasets/daphne-cornelisse/pufferdrive_train)
-- **Large dataset**: [GPUDrive](https://huggingface.co/datasets/EMERGE-lab/GPUDrive) contains 100,000 unique scenes
+This installs the Linux dependencies, creates a Python environment, copies the
+repository to the Linux filesystem, and builds the native extension.
 
-**Note**: Replace 'GPUDrive_mini' with 'GPUDrive' in your download commands if you want to use the full dataset.
+The warning messages from the C compiler about ignored return values are not
+fatal. The setup is successful when it prints:
 
-### Additional Data Sources
-
-For more training data compatible with PufferDrive, see [ScenarioMax](https://github.com/valeoai/ScenarioMax). The GPUDrive data format is fully compatible with PufferDrive.
-</details>
-
-
-## Visualizer
-
-<details>
-<summary>Dependencies and usage</summary>
+```text
+Done. Native build is ready
+```
 
-## Local rendering
+## 3. Prepare Waymo scenarios
 
-To launch an interactive renderer, first build:
-```
-bash scripts/build_ocean.sh drive local
-```
+Put one or more exported scenario JSON files in the Windows repository. Then
+run from WSL:
 
-then launch:
 ```bash
-./drive
+cd /mnt/c/Users/<username>/Desktop/PufferDrive
+bash scripts/prepare_waymo_maps_wsl.sh \
+  ./scenario_a.json \
+  ./scenario_b.json
 ```
-this will run `demo()` with an existing model checkpoint.
-
-## Headless server setup
 
-Run the Raylib visualizer on a headless server and export as .mp4. This will rollout the pre-trained policy in the env.
+The binary maps are written to the Linux-native repository:
 
-### Install dependencies
-
-```bash
-sudo apt update
-sudo apt install ffmpeg xvfb
+```text
+~/PufferDrive-native/resources/drive/binaries/training
 ```
 
-For HPC (There are no root privileges), so install into the conda environment
-```bash
-conda install -c conda-forge xorg-x11-server-xvfb-cos6-x86_64
-conda install -c conda-forge ffmpeg
+The files must be contiguous:
+
+```text
+map_000.bin
+map_001.bin
+map_002.bin
 ```
 
-- `ffmpeg`: Video processing and conversion
-- `xvfb`: Virtual display for headless environments
+## 4. Run a small training test
 
-### Build and run
+From the Windows-mounted repository in WSL:
 
-1. Build the application:
 ```bash
-bash scripts/build_ocean.sh visualize local
+bash scripts/run_minimal_ppo_wsl.sh \
+  --map-dir resources/drive/binaries/training \
+  --num-maps 2 \
+  --num-envs 1 \
+  --total-timesteps 10000 \
+  --rollout-steps 128 \
+  --minibatch-size 128
 ```
 
-2. Run with virtual display:
-```bash
-xvfb-run -s "-screen 0 1280x720x24" ./visualize
-```
+Change `--num-maps` to match the number of prepared map files.
 
-The `-s` flag sets up a virtual screen at 1280x720 resolution with 24-bit color depth.
+Checkpoints are saved under:
 
----
+```text
+~/PufferDrive-native/checkpoints/minimal_ppo
+```
 
-> To force a rebuild, you can delete the cached compiled executable binary using `rm ./visualize`.
+The training is running correctly when the step count increases, the losses
+remain finite, and `.pt` checkpoint files are created.
 
----
+## 5. Render the trained policy
 
-</details>
+```bash
+bash scripts/visualize_minimal_ppo_wsl.sh \
+  --map-dir resources/drive/binaries/training \
+  --num-maps 2 \
+  --episode-length 91 \
+  --draw-traces
+```
 
+The script renders with the native Raylib 3D renderer and copies the MP4 and
+JSON metrics to the Windows folder:
 
-## Benchmarks
+```text
+training_visualizations/
+```
 
-### Distributional realism
+## Visualize a Waymo JSON without training
 
-We provide a PufferDrive implementation of the [Waymo Open Sim Agents Challenge (WOSAC)](https://waymo.com/open/challenges/2025/sim-agents/) for fast, easy evaluation of how well your trained agent matches distributional properties of human behavior. See documentation [here](https://emerge-lab.github.io/PufferDrive/wosac/).
+The following commands use the Windows virtual environment.
 
-WOSAC evaluation with random policy:
-```bash
-puffer eval puffer_drive --eval.wosac-realism-eval True
-```
+Create a top-down replay:
 
-WOSAC evaluation with your checkpoint (must be .pt file):
-```bash
-puffer eval puffer_drive --eval.wosac-realism-eval True --load-model-path <your-trained-policy>.pt
+```powershell
+.\.venv\Scripts\python.exe scripts\visualize_waymo_json.py scenario.json
 ```
 
-### Human-compatibility
+Create a simple 3D chase-camera replay for a selected Waymo track:
 
-You may be interested in how compatible your agent is with human partners. For this purpose, we support an eval where your policy only controls the self-driving car (SDC). The rest of the agents in the scene are stepped using the logs. While it is not a perfect eval since the human partners here are static, it will still give you a sense of how closely aligned your agent's behavior is to how people drive. You can run it like this:
-```bash
-puffer eval puffer_drive --eval.human-replay-eval True --load-model-path <your-trained-policy>.pt
+```powershell
+.\.venv\Scripts\python.exe scripts\render_waymo_follow_3d.py scenario.json `
+  --track-index 90 `
+  --start-frame 0 `
+  --end-frame 60
 ```
 
-## Development
+The lightweight chase-camera renderer uses boxes and map lines. It is useful
+for quickly checking a recorded trajectory without compiling Raylib. The
+native checkpoint renderer uses the PufferDrive car models and full native
+rendering.
 
-<details><summary>Documentation and browser demo</summary>
+Outputs are written to:
 
-**Docs**
-
-A browsable documentation site now lives under `docs/` and is configured with mkbooks. To preview locally:
-```
-brew install mdbook
-mdbook serve --open docs
+```text
+visualizations/
 ```
-Open the served URL to see a local version of the docs.
 
-**Interactive demo**
+## Main files
 
-To edit the browser demo, follow these steps:
-- Download [emscripten](https://github.com/emscripten-core/emscripten)
-- emscripten install latest
-- Activate: `source emsdk/emsdk_env.sh`
-- Run `bash scripts/build_ocean.sh drive web`
-- This generates a number of `game*` files, move them to `assets/` to include them on the webpage
+| File | Purpose |
+| --- | --- |
+| `scripts/minimal_ppo_train.py` | Small PPO actor-critic training loop |
+| `scripts/parallel_data_collect.py` | Original rollout pattern used by the trainer |
+| `scripts/prepare_waymo_maps.py` | Converts Waymo JSON to PufferDrive maps |
+| `scripts/run_minimal_ppo_wsl.sh` | Starts training in the Linux-native copy |
+| `scripts/visualize_minimal_ppo.py` | Runs a checkpoint and records native 3D video |
+| `scripts/visualize_waymo_json.py` | Creates a top-down JSON replay |
+| `scripts/render_waymo_follow_3d.py` | Creates a lightweight 3D chase replay |
+| `docs/src/minimal-ppo.md` | More detail about PPO and command options |
 
-</details>
+## Current limitation
 
+A 10,000-step run only verifies the architecture. It is usually not enough to
+learn good driving. Early policies may reverse, steer poorly, or fail to reach
+the goal.
 
-## Citation
+For a useful model, use more scenarios and training steps, then evaluate on
+scenarios that were not used for training. Useful measurements include:
 
-If you use PufferDrive in your research, please cite:
-```bibtex
-@software{pufferdrive2025github,
-  author = {Daphne Cornelisse* and Spencer Cheng* and Pragnay Mandavilli and Julian Hunt and Kevin Joseph and Waël Doulazmi and Valentin Charraut and Aditya Gupta and Joseph Suarez and Eugene Vinitsky},
-  title = {{PufferDrive}: A Fast and Friendly Driving Simulator for Training and Evaluating {RL} Agents},
-  url = {https://github.com/Emerge-Lab/PufferDrive},
-  version = {2.0.0},
-  year = {2025},
-}
-```
+- goal completion rate
+- collision rate
+- off-road rate
+- reverse-motion frequency
+- average episode return
+
+## Upstream project
+
+PufferDrive is developed by Emerge Lab. The original documentation is
+available at <https://emerge-lab.github.io/PufferDrive>.
diff --git a/scripts/render_waymo_follow_3d.py b/scripts/render_waymo_follow_3d.py
new file mode 100644
index 0000000000..23460cdf04
--- /dev/null
+++ b/scripts/render_waymo_follow_3d.py
@@ -0,0 +1,267 @@
+import argparse
+import json
+import math
+from pathlib import Path
+
+import imageio.v2 as imageio
+import numpy as np
+from PIL import Image, ImageDraw, ImageFont
+
+
+ROAD_COLORS = {
+    "road_edge": (222, 226, 229),
+    "road_line": (244, 198, 63),
+    "lane": (91, 98, 105),
+    "crosswalk": (201, 205, 209),
+    "speed_bump": (218, 135, 63),
+    "driveway": (112, 124, 105),
+}
+
+
+def normalize(vector):
+    length = np.linalg.norm(vector)
+    return vector / max(length, 1e-8)
+
+
+def feature_geometry(feature):
+    for feature_type in ROAD_COLORS:
+        if feature_type not in feature:
+            continue
+        data = feature[feature_type]
+        points = data.get("polyline") or data.get("polygon") or []
+        if len(points) >= 2:
+            return feature_type, points
+    return None, None
+
+
+class ChaseCamera:
+    def __init__(self, state, width, height):
+        heading = float(state.get("heading", 0.0))
+        forward_xy = np.array([math.cos(heading), math.sin(heading), 0.0])
+        target = np.array([state["center_x"], state["center_y"], 1.0])
+        self.position = target - forward_xy * 15.0 + np.array([0.0, 0.0, 8.0])
+        look_at = target + forward_xy * 9.0 + np.array([0.0, 0.0, 0.5])
+        self.forward = normalize(look_at - self.position)
+        self.right = normalize(np.cross(self.forward, np.array([0.0, 0.0, 1.0])))
+        self.up = normalize(np.cross(self.right, self.forward))
+        self.width = width
+        self.height = height
+        self.focal = width / (2 * math.tan(math.radians(68) / 2))
+
+    def project(self, point):
+        relative = np.asarray(point, dtype=float) - self.position
+        depth = float(np.dot(relative, self.forward))
+        if depth < 0.8:
+            return None
+        x = float(np.dot(relative, self.right))
+        y = float(np.dot(relative, self.up))
+        return (
+            self.width / 2 + self.focal * x / depth,
+            self.height * 0.53 - self.focal * y / depth,
+            depth,
+        )
+
+
+def car_vertices(state):
+    heading = float(state.get("heading", 0.0))
+    length = float(state.get("length", 4.5))
+    width = float(state.get("width", 1.9))
+    height = float(state.get("height", 1.6))
+    center = np.array([state["center_x"], state["center_y"]])
+    forward = np.array([math.cos(heading), math.sin(heading)])
+    side = np.array([-forward[1], forward[0]])
+    footprint = [
+        center + forward * length / 2 + side * width / 2,
+        center + forward * length / 2 - side * width / 2,
+        center - forward * length / 2 - side * width / 2,
+        center - forward * length / 2 + side * width / 2,
+    ]
+    return [
+        np.array([point[0], point[1], z])
+        for z in (0.08, height)
+        for point in footprint
+    ]
+
+
+def shade(color, factor):
+    return tuple(max(0, min(255, int(channel * factor))) for channel in color)
+
+
+def car_faces(state, color, camera):
+    vertices = car_vertices(state)
+    faces = [
+        ([4, 5, 6, 7], shade(color, 1.12)),
+        ([0, 1, 5, 4], shade(color, 0.92)),
+        ([1, 2, 6, 5], shade(color, 0.72)),
+        ([2, 3, 7, 6], shade(color, 0.82)),
+        ([3, 0, 4, 7], shade(color, 0.68)),
+    ]
+    projected_faces = []
+    for indices, face_color in faces:
+        projected = [camera.project(vertices[index]) for index in indices]
+        if any(point is None for point in projected):
+            continue
+        depth = sum(point[2] for point in projected) / len(projected)
+        projected_faces.append((depth, [(point[0], point[1]) for point in projected], face_color))
+    return projected_faces
+
+
+def draw_ground(draw, camera, target_state):
+    heading = float(target_state.get("heading", 0.0))
+    forward = np.array([math.cos(heading), math.sin(heading)])
+    side = np.array([-forward[1], forward[0]])
+    center = np.array([target_state["center_x"], target_state["center_y"]])
+
+    for lateral in range(-40, 41, 5):
+        points = [
+            camera.project((*point, 0.0))
+            for point in (center + side * lateral + forward * distance for distance in range(-10, 91, 5))
+        ]
+        points = [(point[0], point[1]) for point in points if point]
+        if len(points) >= 2:
+            draw.line(points, fill=(48, 54, 58), width=1)
+
+    for distance in range(-5, 91, 5):
+        points = [
+            camera.project((*point, 0.0))
+            for point in (center + forward * distance + side * lateral for lateral in range(-40, 41, 4))
+        ]
+        points = [(point[0], point[1]) for point in points if point]
+        if len(points) >= 2:
+            draw.line(points, fill=(48, 54, 58), width=1)
+
+
+def draw_map(draw, camera, map_features, target_state):
+    target_xy = np.array([target_state["center_x"], target_state["center_y"]])
+    for feature_type, points in map_features:
+        visible = []
+        for point in points:
+            point_xy = np.array([point["x"], point["y"]])
+            if np.linalg.norm(point_xy - target_xy) > 105:
+                if len(visible) >= 2:
+                    draw.line(visible, fill=ROAD_COLORS[feature_type], width=2)
+                visible = []
+                continue
+            projected = camera.project((point["x"], point["y"], 0.05))
+            if projected:
+                visible.append((projected[0], projected[1]))
+        if len(visible) >= 2:
+            width = 3 if feature_type in {"road_edge", "road_line"} else 1
+            draw.line(visible, fill=ROAD_COLORS[feature_type], width=width, joint="curve")
+
+
+def render_frame(data, map_features, target_track_index, frame, width, height):
+    target_state = data["tracks"][target_track_index]["states"][frame]
+    camera = ChaseCamera(target_state, width, height)
+    image = Image.new("RGB", (width, height), (18, 23, 27))
+    draw = ImageDraw.Draw(image)
+
+    horizon = int(height * 0.48)
+    draw.rectangle((0, horizon, width, height), fill=(31, 36, 39))
+    draw_ground(draw, camera, target_state)
+    draw_map(draw, camera, map_features, target_state)
+
+    all_faces = []
+    target_xy = np.array([target_state["center_x"], target_state["center_y"]])
+    for track_index, track in enumerate(data["tracks"]):
+        states = track.get("states", [])
+        if frame >= len(states):
+            continue
+        state = states[frame]
+        if not state.get("valid"):
+            continue
+        distance = np.linalg.norm(
+            np.array([state["center_x"], state["center_y"]]) - target_xy
+        )
+        if distance > 75:
+            continue
+        if track_index == target_track_index:
+            color = (0, 190, 242)
+        elif track.get("object_type") == "TYPE_PEDESTRIAN":
+            color = (178, 102, 220)
+        else:
+            color = (222, 83, 58)
+        all_faces.extend(car_faces(state, color, camera))
+
+    for _, polygon, color in sorted(all_faces, key=lambda item: item[0], reverse=True):
+        draw.polygon(polygon, fill=color, outline=(235, 239, 242))
+
+    timestamp = data.get("timestamps_seconds", [])
+    seconds = timestamp[frame] if frame < len(timestamp) else frame / 10
+    draw.rounded_rectangle((18, 18, 440, 73), radius=6, fill=(9, 13, 16))
+    draw.text(
+        (32, 29),
+        f"Roundabout chase | track {target_track_index} | t={seconds:.1f}s",
+        fill=(238, 242, 244),
+        font=ImageFont.load_default(),
+    )
+    draw.text(
+        (32, 50),
+        "Waymo recorded trajectory",
+        fill=(0, 190, 242),
+        font=ImageFont.load_default(),
+    )
+    return image
+
+
+def main():
+    parser = argparse.ArgumentParser()
+    parser.add_argument("json_file", type=Path)
+    parser.add_argument("--track-index", type=int, default=90)
+    parser.add_argument("--start-frame", type=int, default=0)
+    parser.add_argument("--end-frame", type=int, default=60)
+    parser.add_argument("--fps", type=int, default=10)
+    parser.add_argument("--width", type=int, default=1280)
+    parser.add_argument("--height", type=int, default=720)
+    parser.add_argument("--output", type=Path)
+    args = parser.parse_args()
+
+    with args.json_file.open("r", encoding="utf-8") as file:
+        data = json.load(file)
+
+    map_features = []
+    for feature in data.get("map_features", []):
+        feature_type, points = feature_geometry(feature)
+        if points:
+            map_features.append((feature_type, points))
+
+    scenario_id = data.get("scenario_id", args.json_file.stem)
+    output = args.output or Path("visualizations") / f"{scenario_id}_roundabout_follow_3d.mp4"
+    output.parent.mkdir(parents=True, exist_ok=True)
+    preview = output.with_suffix(".png")
+
+    frames = []
+    skipped_frames = []
+    for frame_index in range(args.start_frame, args.end_frame + 1):
+        target_states = data["tracks"][args.track_index].get("states", [])
+        if frame_index >= len(target_states):
+            skipped_frames.append(frame_index)
+            continue
+        target_state = target_states[frame_index]
+        if not target_state.get("valid") or "center_x" not in target_state:
+            skipped_frames.append(frame_index)
+            continue
+        frame = render_frame(
+            data,
+            map_features,
+            args.track_index,
+            frame_index,
+            args.width,
+            args.height,
+        )
+        if frame_index == args.start_frame:
+            frame.save(preview)
+        frames.append(np.asarray(frame))
+        if frame_index % 10 == 0:
+            print(f"rendered frame {frame_index:02d}/{args.end_frame}")
+
+    imageio.mimsave(output, frames, fps=args.fps, macro_block_size=16)
+    print(f"video: {output}")
+    print(f"preview: {preview}")
+    print(f"frames: {len(frames)}, duration: {len(frames) / args.fps:.1f}s")
+    if skipped_frames:
+        print(f"skipped invalid source frames: {skipped_frames}")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/scripts/visualize_waymo_json.py b/scripts/visualize_waymo_json.py
new file mode 100644
index 0000000000..bcbadb431e
--- /dev/null
+++ b/scripts/visualize_waymo_json.py
@@ -0,0 +1,331 @@
+import argparse
+import json
+from pathlib import Path
+
+import imageio.v2 as imageio
+import numpy as np
+from PIL import Image, ImageDraw, ImageFont
+
+
+ROAD_STYLES = {
+    "lane": ((150, 156, 162), 1),
+    "road_edge": ((55, 64, 72), 2),
+    "road_line": ((206, 165, 48), 2),
+    "crosswalk": ((124, 111, 176), 2),
+    "speed_bump": ((214, 111, 44), 2),
+    "stop_sign": ((191, 63, 47), 4),
+    "driveway": ((123, 139, 92), 1),
+}
+
+OBJECT_TYPE_MAP = {
+    "TYPE_VEHICLE": "vehicle",
+    "TYPE_PEDESTRIAN": "pedestrian",
+    "TYPE_CYCLIST": "cyclist",
+}
+
+
+def point_dict(point):
+    return {
+        "x": float(point.get("x", point.get("center_x", 0.0))),
+        "y": float(point.get("y", point.get("center_y", 0.0))),
+        "z": float(point.get("z", point.get("center_z", 0.0))),
+    }
+
+
+def convert_raw_waymo(data):
+    if "objects" in data and "roads" in data:
+        return data
+    if "tracks" not in data or "map_features" not in data:
+        raise ValueError("Unsupported JSON schema: expected objects/roads or tracks/map_features")
+
+    objects = []
+    for track in data["tracks"]:
+        states = track.get("states", [])
+        valid_states = [state for state in states if state.get("valid")]
+        dimensions = valid_states[0] if valid_states else (states[0] if states else {})
+        objects.append(
+            {
+                "id": track.get("id"),
+                "type": OBJECT_TYPE_MAP.get(track.get("object_type"), "vehicle"),
+                "position": [point_dict(state) for state in states],
+                "heading": [float(state.get("heading", 0.0)) for state in states],
+                "velocity": [
+                    {
+                        "x": float(state.get("velocity_x", 0.0)),
+                        "y": float(state.get("velocity_y", 0.0)),
+                        "z": 0.0,
+                    }
+                    for state in states
+                ],
+                "valid": [bool(state.get("valid")) for state in states],
+                "length": float(dimensions.get("length", 4.0)),
+                "width": float(dimensions.get("width", 2.0)),
+                "height": float(dimensions.get("height", 1.5)),
+                "goalPosition": point_dict(valid_states[-1]) if valid_states else {"x": 0, "y": 0, "z": 0},
+                "mark_as_expert": False,
+            }
+        )
+
+    roads = []
+    for feature_index, feature in enumerate(data["map_features"]):
+        feature_id = feature.get("id", feature_index)
+        for road_type in ROAD_STYLES:
+            if road_type not in feature:
+                continue
+            feature_data = feature[road_type]
+            geometry = feature_data.get("polyline") or feature_data.get("polygon") or []
+            if not geometry and feature_data.get("position"):
+                geometry = [feature_data["position"]]
+            roads.append(
+                {
+                    "id": feature_id,
+                    "map_element_id": feature_id,
+                    "type": road_type,
+                    "geometry": [point_dict(point) for point in geometry],
+                }
+            )
+            break
+
+    return {
+        "name": data.get("scenario_id", "waymo_scenario"),
+        "scenario_id": data.get("scenario_id"),
+        "objects": objects,
+        "roads": roads,
+        "timestamps_seconds": data.get("timestamps_seconds", []),
+        "metadata": {
+            "sdc_track_index": data.get("sdc_track_index"),
+            "tracks_to_predict": data.get("tracks_to_predict", []),
+        },
+    }
+
+
+def valid_at(obj, frame):
+    valid = obj.get("valid")
+    return valid is None or frame < len(valid) and bool(valid[frame])
+
+
+def point_at(obj, frame):
+    pos = obj.get("position", [])
+    if frame >= len(pos):
+        return None
+    return pos[frame]
+
+
+def bounds_for(data):
+    xs = []
+    ys = []
+    for road in data.get("roads", []):
+        for point in road.get("geometry", []):
+            xs.append(point["x"])
+            ys.append(point["y"])
+
+    for obj in data.get("objects", []):
+        for frame, point in enumerate(obj.get("position", [])):
+            if valid_at(obj, frame):
+                xs.append(point["x"])
+                ys.append(point["y"])
+
+    if not xs or not ys:
+        raise ValueError("Scenario has no drawable x/y coordinates")
+
+    return min(xs), max(xs), min(ys), max(ys)
+
+
+def make_transform(bounds, width, height, pad):
+    min_x, max_x, min_y, max_y = bounds
+    world_w = max(max_x - min_x, 1.0)
+    world_h = max(max_y - min_y, 1.0)
+    scale = min((width - 2 * pad) / world_w, (height - 2 * pad) / world_h)
+    extra_x = (width - 2 * pad - world_w * scale) / 2
+    extra_y = (height - 2 * pad - world_h * scale) / 2
+
+    def transform(x, y):
+        px = pad + extra_x + (x - min_x) * scale
+        py = height - pad - extra_y - (y - min_y) * scale
+        return px, py
+
+    return transform, scale
+
+
+def object_color(data, index, obj):
+    metadata = data.get("metadata", {})
+    predicted = {track.get("track_index") for track in metadata.get("tracks_to_predict", [])}
+    if index == metadata.get("sdc_track_index"):
+        return 25, 103, 210
+    if index in predicted or obj.get("mark_as_expert"):
+        return 191, 63, 47
+    if obj.get("type") in {"pedestrian", "cyclist"}:
+        return 141, 90, 194
+    return 44, 139, 87
+
+
+def draw_polyline(draw, points, transform, fill, width):
+    coords = []
+    for point in points:
+        x = point.get("x")
+        y = point.get("y")
+        if isinstance(x, (int, float)) and isinstance(y, (int, float)):
+            coords.append(transform(x, y))
+    if len(coords) >= 2:
+        draw.line(coords, fill=fill, width=width, joint="curve")
+
+
+def draw_object(draw, data, obj, index, frame, transform, scale, trails, highlight_index=None):
+    point = point_at(obj, frame)
+    if not point or not valid_at(obj, frame):
+        return
+
+    highlighted = index == highlight_index
+    color = (0, 173, 239) if highlighted else object_color(data, index, obj)
+    if trails:
+        trail = []
+        for i in range(frame + 1):
+            trail_point = point_at(obj, i)
+            if trail_point and valid_at(obj, i):
+                trail.append(trail_point)
+        if len(trail) > 1:
+            draw_polyline(draw, trail, transform, color + ((220 if highlighted else 95),), 4 if highlighted else 2)
+
+    cx, cy = transform(point["x"], point["y"])
+    length = max(5, obj.get("length", 4.0) * scale)
+    width = max(3, obj.get("width", 2.0) * scale)
+    heading_values = obj.get("heading", [])
+    heading = heading_values[frame] if frame < len(heading_values) else 0
+
+    forward = np.array([np.cos(-heading), np.sin(-heading)])
+    side = np.array([-forward[1], forward[0]])
+    center = np.array([cx, cy])
+    corners = [
+        center + forward * length / 2 + side * width / 2,
+        center + forward * length / 2 - side * width / 2,
+        center - forward * length / 2 - side * width / 2,
+        center - forward * length / 2 + side * width / 2,
+    ]
+    polygon = [tuple(corner) for corner in corners]
+    if highlighted:
+        halo = max(8, int(max(length, width) * 0.8))
+        draw.ellipse((cx - halo, cy - halo, cx + halo, cy + halo), outline=(0, 173, 239, 210), width=3)
+    draw.polygon(polygon, fill=color, outline=(255, 255, 255), width=2 if highlighted else 1)
+
+    nose = [
+        tuple(center + forward * length / 2),
+        tuple(center + forward * length * 0.22 + side * width * 0.28),
+        tuple(center + forward * length * 0.22 - side * width * 0.28),
+    ]
+    draw.polygon(nose, fill=(245, 247, 249))
+
+
+def render_frame(data, frame, transform, scale, width, height, trails, highlight_index=None):
+    image = Image.new("RGB", (width, height), (235, 231, 223))
+    overlay = Image.new("RGBA", (width, height), (0, 0, 0, 0))
+    draw = ImageDraw.Draw(overlay, "RGBA")
+
+    for road in data.get("roads", []):
+        color, line_width = ROAD_STYLES.get(road.get("type"), ((110, 110, 110), 1))
+        alpha = 125 if road.get("type") == "lane" else 185
+        draw_polyline(draw, road.get("geometry", []), transform, color + (alpha,), line_width)
+
+    for index, obj in enumerate(data.get("objects", [])):
+        draw_object(draw, data, obj, index, frame, transform, scale, trails, highlight_index)
+
+    timestamp_values = data.get("timestamps_seconds", [])
+    timestamp = timestamp_values[frame] if frame < len(timestamp_values) else frame / 10
+    label = f"{data.get('scenario_id', data.get('name', 'scenario'))} | frame {frame:02d} | t={timestamp:.1f}s"
+    if highlight_index is not None and highlight_index < len(data.get("objects", [])):
+        target = data["objects"][highlight_index]
+        label += f" | target track {highlight_index}, id {target.get('id')}"
+    ImageDraw.Draw(overlay).rounded_rectangle((10, 8, min(width - 10, 470), 34), radius=4, fill=(245, 247, 249, 220))
+    ImageDraw.Draw(overlay).text(
+        (16, 14),
+        label,
+        fill=(29, 37, 44, 230),
+        font=ImageFont.load_default(),
+    )
+    return Image.alpha_composite(image.convert("RGBA"), overlay).convert("RGB")
+
+
+def render_scenario(
+    path,
+    output_dir,
+    width,
+    height,
+    fps,
+    trails,
+    focus_center=None,
+    focus_radius=None,
+    highlight_index=None,
+    start_frame=0,
+    end_frame=None,
+    output_suffix="",
+):
+    with path.open("r", encoding="utf-8") as f:
+        data = convert_raw_waymo(json.load(f))
+
+    max_frame = max(len(obj.get("position", [])) for obj in data.get("objects", [])) - 1
+    end_frame = max_frame if end_frame is None else min(end_frame, max_frame)
+    start_frame = max(0, min(start_frame, end_frame))
+    if focus_center and focus_radius:
+        center_x, center_y = focus_center
+        bounds = (
+            center_x - focus_radius,
+            center_x + focus_radius,
+            center_y - focus_radius,
+            center_y + focus_radius,
+        )
+    else:
+        bounds = bounds_for(data)
+    transform, scale = make_transform(bounds, width, height, pad=40)
+    scenario_id = str(data.get("scenario_id") or path.stem)
+    output_dir.mkdir(parents=True, exist_ok=True)
+    mp4_path = output_dir / f"{scenario_id}{output_suffix}.mp4"
+    png_path = output_dir / f"{scenario_id}{output_suffix}_frame{start_frame:03d}.png"
+
+    frames = []
+    for frame in range(start_frame, end_frame + 1):
+        image = render_frame(data, frame, transform, scale, width, height, trails, highlight_index)
+        if frame == start_frame:
+            image.save(png_path)
+        frames.append(np.asarray(image))
+
+    imageio.mimsave(mp4_path, frames, fps=fps)
+    return mp4_path, png_path, len(frames)
+
+
+def main():
+    parser = argparse.ArgumentParser()
+    parser.add_argument("json_files", nargs="+", type=Path)
+    parser.add_argument("--output-dir", type=Path, default=Path("visualizations"))
+    parser.add_argument("--width", type=int, default=1280)
+    parser.add_argument("--height", type=int, default=800)
+    parser.add_argument("--fps", type=int, default=10)
+    parser.add_argument("--no-trails", action="store_true")
+    parser.add_argument("--focus-center", nargs=2, type=float, metavar=("X", "Y"))
+    parser.add_argument("--focus-radius", type=float)
+    parser.add_argument("--highlight-track", type=int)
+    parser.add_argument("--start-frame", type=int, default=0)
+    parser.add_argument("--end-frame", type=int)
+    parser.add_argument("--output-suffix", default="")
+    args = parser.parse_args()
+
+    for path in args.json_files:
+        mp4_path, png_path, frame_count = render_scenario(
+            path=path,
+            output_dir=args.output_dir,
+            width=args.width,
+            height=args.height,
+            fps=args.fps,
+            trails=not args.no_trails,
+            focus_center=args.focus_center,
+            focus_radius=args.focus_radius,
+            highlight_index=args.highlight_track,
+            start_frame=args.start_frame,
+            end_frame=args.end_frame,
+            output_suffix=args.output_suffix,
+        )
+        print(f"Rendered {path.name}: {frame_count} frames")
+        print(f"  video: {mp4_path}")
+        print(f"  preview: {png_path}")
+
+
+if __name__ == "__main__":
+    main()