Skip to content

Commit 30ce34a

Browse files
Eugene Vinitskyclaude
authored andcommitted
nightly: add multi-agent yaml + launcher; date-scoped wandb groups
Mirror the existing single-agent nightly launcher with a multi-agent counterpart so both flavors can run on Greene with the same shape. - scripts/cluster_configs/nightly_best.yaml — multi-agent (oignons2) config (720k agents, 8 maps, 10B steps, gigaflow encoder + split network). - scripts/launch_nightly_best.sh — Greene launcher; same env-overridable shape as launch_single_agent.sh but defaults TIME=1800 / MEM=192gb to match the multi-agent profile. - Both launchers add wandb_group=${DATE_STAMP} to --args so a night's 3 seeds cluster under one date-named wandb group. - Yamls now point at split wandb projects: single_agent_speed_run.yaml -> nightly-single nightly_best.yaml -> nightly-multi Date-scoped grouping happens within each project. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
1 parent 94dfba9 commit 30ce34a

4 files changed

Lines changed: 189 additions & 3 deletions

File tree

Lines changed: 135 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,135 @@
1+
# Multi-agent "best launch" nightly training program config.
2+
# Derived from the oignons2 (emerge/temp_training) configuration at:
3+
# weights/oignons2/config.yaml
4+
# Adapted to NYU Greene cluster paths and resource shape. Multi-agent gigaflow
5+
# training over the 8 local CARLA maps with the oignons2 policy architecture,
6+
# reward shaping (conditioning + randomization on), and partner-blindness /
7+
# phantom-braking perturbations enabled. Keys here override
8+
# pufferlib/config/ocean/drive.ini.
9+
#
10+
# Launch via scripts/launch_nightly_best.sh (3 seeds, date-stamped).
11+
12+
# Environment — multi-agent gigaflow over all 8 local CARLA towns
13+
env.simulation_mode: gigaflow
14+
env.map_dir: pufferlib/resources/drive/binaries/carla
15+
env.num_maps: 8
16+
env.num_agents: 720000
17+
env.min_agents_per_env: 1
18+
env.max_agents_per_env: 150
19+
env.use_map_cache: 1
20+
env.scenario_length: 1200
21+
# 0 disables periodic scenario resampling — every sub-env keeps the same map
22+
# for the full run instead of swapping every 38400 steps.
23+
env.resample_frequency: 0
24+
env.termination_mode: 1
25+
env.inactive_agent_threshold: 0.4
26+
env.dynamics_model: jerk
27+
env.target_type: static
28+
env.spawn_initial_speed: 0.0
29+
env.dt: 0.3
30+
env.traffic_light_behavior: 1
31+
env.collision_behavior: 1
32+
env.offroad_behavior: 1
33+
34+
# Goal setup — three sequential waypoints, route-based placement [20, 60m]
35+
env.num_target_waypoints: 3
36+
env.min_waypoint_spacing: 20.0
37+
env.max_waypoint_spacing: 60.0
38+
env.goal_radius: 2.0
39+
env.goal_speed: 3.0
40+
41+
# Observation shaping (matches oignons2)
42+
env.obs_slots_lane_n: 80
43+
env.obs_slots_boundary_n: 80
44+
env.obs_slots_partners_n: 16
45+
env.obs_slots_traffic_controls_n: 4
46+
env.obs_range_partner_m: 200.0
47+
env.obs_range_road_front_m: 200.0
48+
env.obs_range_road_behind_m: 40.0
49+
env.obs_range_road_side_m: 50.0
50+
env.obs_range_traffic_control_m: 100.0
51+
env.obs_norm_xy_offset_m: 200.0
52+
env.obs_norm_goal_offset_m: 200.0
53+
env.obs_norm_road_seg_length_m: 10.0
54+
env.obs_norm_road_seg_width_m: 5.0
55+
env.obs_norm_veh_length_m: 15.0
56+
env.obs_norm_veh_width_m: 10.0
57+
env.obs_dropout_lane: 0.5
58+
env.obs_dropout_boundary: 0.4
59+
60+
# Perturbations (on during training; eval's clean macro zeros these)
61+
env.partner_blindness_prob: 0.03
62+
env.partner_blindness_trigger_prob: 0.05
63+
env.phantom_braking_prob: 0.02
64+
env.phantom_braking_trigger_prob: 0.02
65+
env.phantom_braking_duration: 10
66+
67+
# Reward shaping (oignons2 weights + conditioning/randomization on)
68+
env.reward_conditioning: true
69+
env.reward_randomization: true
70+
env.reward_goal: 1.0
71+
env.reward_collision: 1.5
72+
env.reward_offroad: 1.5
73+
env.reward_stop_line: 1.0
74+
env.reward_comfort: 0.05
75+
env.reward_lane_align: 0.025
76+
env.reward_vel_align: 1.0
77+
env.reward_lane_center: 0.005
78+
env.reward_velocity: 0.0025
79+
env.reward_reverse: 0.005
80+
env.reward_timestep: 2.5e-05
81+
env.reward_overspeed: 0.05
82+
83+
# Policy — 3x1024 backbone, split actor/critic, gigaflow encoder
84+
policy.input_size: 256
85+
policy.backbone_hidden_size: 1024
86+
policy.backbone_num_layers: 3
87+
policy.actor_hidden_size: 1024
88+
policy.actor_num_layers: 0
89+
policy.critic_hidden_size: 1024
90+
policy.critic_num_layers: 0
91+
policy.split_network: true
92+
policy.encoder_gigaflow: true
93+
policy.dropout: 0.0
94+
95+
# Training — 10B steps, large minibatch, compiled bfloat16
96+
train.total_timesteps: 10_000_000_000
97+
train.learning_rate: 0.0005
98+
train.minibatch_size: 153600
99+
train.max_minibatch_size: 153600
100+
train.update_epochs: 3
101+
train.bptt_horizon: 128
102+
train.compile: true
103+
train.precision: bfloat16
104+
train.normalize_rewards: false
105+
train.checkpoint_interval: 500
106+
train.optimizer: adamw
107+
108+
# Eval — keep validation_gigaflow (CARLA sweep) inline, disable everything else
109+
# (validation_replay needs nuPlan bins; behaviors_* need labelled scene
110+
# categories not used in this nightly). Interval 250 keeps eval cost ~5% of
111+
# wall-clock instead of ~85%.
112+
eval.validation_defaults.interval: 250
113+
eval.validation_replay.enabled: 0
114+
eval.validation_gigaflow.render_backend: egl
115+
eval.behaviors_full_dir.enabled: 0
116+
eval.behaviors_hard_stop.enabled: 0
117+
eval.behaviors_highway_straight.enabled: 0
118+
eval.behaviors_lane_change.enabled: 0
119+
eval.behaviors_merge.enabled: 0
120+
eval.behaviors_parked_cars.enabled: 0
121+
eval.behaviors_roundabout.enabled: 0
122+
eval.behaviors_stopped_traffic.enabled: 0
123+
eval.behaviors_traffic_light_green.enabled: 0
124+
eval.behaviors_traffic_light_stop.enabled: 0
125+
eval.behaviors_unprotected_left.enabled: 0
126+
eval.behaviors_unprotected_right.enabled: 0
127+
128+
# W&B — group has no space (submit_cluster.py joins the inner command
129+
# without quoting arg values). Launchers (launch_nightly_best.sh and
130+
# Modal's nightly()) override wandb_group to today's date at launch so
131+
# runs cluster by night in the UI; the static value here is just the
132+
# fallback for ad-hoc invocations.
133+
wandb: True
134+
wandb_project: nightly-multi
135+
wandb_group: nightly-multi

scripts/cluster_configs/single_agent_speed_run.yaml

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,9 @@ eval.behaviors_unprotected_right.enabled: 0
6767

6868
# W&B. Group has no space: submit_cluster.py joins the inner command into a
6969
# bash -c string without quoting arg values, so a space would split the arg.
70+
# Launchers (launch_single_agent.sh and Modal's nightly()) override
71+
# wandb_group to today's date at launch so runs cluster by night in the UI;
72+
# the static value here is just the fallback for ad-hoc invocations.
7073
wandb: True
71-
wandb_project: single_agent_nightly_test
72-
wandb_group: Nightly_Test
74+
wandb_project: nightly-single
75+
wandb_group: nightly-single

scripts/launch_nightly_best.sh

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
#!/bin/bash
2+
# Launch multi-agent "nightly best" training on the cluster via submit_cluster.py.
3+
# Mirrors launch_single_agent.sh but uses the oignons2-derived nightly_best.yaml
4+
# (multi-agent gigaflow over 8 CARLA towns, 10B total steps). Code-isolated per
5+
# run, container-wrapped, gpu-heartbeated, date-stamped wandb run names.
6+
#
7+
# Run on the login node (it sources the venv and submits from there):
8+
# ./scripts/launch_nightly_best.sh
9+
#
10+
# Overridable via the environment:
11+
# PROGRAM_CONFIG program_config YAML (default: nightly_best.yaml)
12+
# SEEDS colon sweep passed to --args train.seed (default 0:1:2 -> 3 jobs)
13+
# ACCOUNT/PARTITION/TIME SLURM overrides
14+
# MEM SLURM --mem (default 192gb; the multi-agent config plus
15+
# inline validation_gigaflow eval can spike past 128gb at
16+
# epoch 250)
17+
# PREFIX run-name prefix (default <date>_multi_agent)
18+
#
19+
# Examples:
20+
# SEEDS=0 ./scripts/launch_nightly_best.sh # one-seed dry run
21+
# PARTITION=h100_tandon ./scripts/launch_nightly_best.sh # if h200 QOS is full
22+
set -euo pipefail
23+
24+
PROGRAM_CONFIG="${PROGRAM_CONFIG:-scripts/cluster_configs/nightly_best.yaml}"
25+
COMPUTE_CONFIG="${COMPUTE_CONFIG:-scripts/cluster_configs/nyu_greene.yaml}"
26+
ACCOUNT="${ACCOUNT:-torch_pr_924_tandon_advanced}"
27+
PARTITION="${PARTITION:-h200_tandon}"
28+
TIME="${TIME:-1800}"
29+
MEM="${MEM:-192gb}"
30+
SEEDS="${SEEDS:-0:1:2}"
31+
PREFIX="${PREFIX:-$(date +%Y-%m-%d)_multi_agent}"
32+
DATE_STAMP="$(date +%Y-%m-%d)"
33+
34+
source "/scratch/$USER/venvs/pufferdrive/bin/activate"
35+
36+
# One submission per seed so we can pass a per-seed run_name (wandb display
37+
# name like 2026-05-31_seed0).
38+
IFS=':' read -ra SEED_LIST <<< "$SEEDS"
39+
for SEED in "${SEED_LIST[@]}"; do
40+
python scripts/submit_cluster.py \
41+
--save_dir "/scratch/$USER/runs" \
42+
--prefix "$PREFIX" \
43+
--compute_config "$COMPUTE_CONFIG" \
44+
--program_config "$PROGRAM_CONFIG" \
45+
--container --heartbeat \
46+
--account "$ACCOUNT" --partition "$PARTITION" --time "$TIME" --mem "$MEM" \
47+
--args "train.seed=$SEED" "run_name=${DATE_STAMP}_seed${SEED}" "wandb_group=${DATE_STAMP}"
48+
done

scripts/launch_single_agent.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -41,5 +41,5 @@ for SEED in "${SEED_LIST[@]}"; do
4141
--program_config "$PROGRAM_CONFIG" \
4242
--container --heartbeat \
4343
--account "$ACCOUNT" --partition "$PARTITION" --time "$TIME" \
44-
--args "train.seed=$SEED" "run_name=${DATE_STAMP}_seed${SEED}"
44+
--args "train.seed=$SEED" "run_name=${DATE_STAMP}_seed${SEED}" "wandb_group=${DATE_STAMP}"
4545
done

0 commit comments

Comments
 (0)