Skip to content

Commit b232b7e

Browse files
Eugene Vinitskyclaude
authored andcommitted
nightly_best_launch: multi-agent nightly derived from oignons2
Adds scripts/cluster_configs/nightly_best_launch.yaml and scripts/launch_nightly_best.sh — a multi-agent nightly mirroring the single_agent_speed_run/launch_single_agent pair but with the oignons2 (emerge/temp_training/weights/oignons2/config.yaml) policy + reward + perturbation config: 1600 agents across 8 CARLA towns, 3x1024 split network with gigaflow encoder, reward conditioning + randomization + partner blindness + phantom braking on, route-based [20, 60m] goals. Defaults wandb_project=nightly-multi-agent, 3 seeds, --mem 192gb. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
1 parent 9f740ed commit b232b7e

2 files changed

Lines changed: 171 additions & 0 deletions

File tree

Lines changed: 130 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,130 @@
1+
# Multi-agent "best launch" nightly training program config.
2+
# Derived from the oignons2 (emerge/temp_training) configuration at:
3+
# weights/oignons2/config.yaml
4+
# Adapted to NYU Greene cluster paths and resource shape. Multi-agent gigaflow
5+
# training over the 8 local CARLA maps with the oignons2 policy architecture,
6+
# reward shaping (conditioning + randomization on), and partner-blindness /
7+
# phantom-braking perturbations enabled. Keys here override
8+
# pufferlib/config/ocean/drive.ini.
9+
#
10+
# Launch via scripts/launch_nightly_best.sh (3 seeds, date-stamped).
11+
12+
# Environment — multi-agent gigaflow over all 8 local CARLA towns
13+
env.simulation_mode: gigaflow
14+
env.map_dir: pufferlib/resources/drive/binaries/carla
15+
env.num_maps: 8
16+
env.num_agents: 1600
17+
env.min_agents_per_env: 1
18+
env.max_agents_per_env: 150
19+
env.use_map_cache: 1
20+
env.scenario_length: 3840
21+
env.resample_frequency: 38400
22+
env.termination_mode: 1
23+
env.inactive_agent_threshold: 0.4
24+
env.dynamics_model: jerk
25+
env.target_type: dijkstra
26+
env.spawn_initial_speed: 0.0
27+
env.dt: 0.1
28+
env.traffic_light_behavior: 1
29+
env.collision_behavior: 1
30+
env.offroad_behavior: 1
31+
32+
# Goal setup — three sequential waypoints, route-based placement [20, 60m]
33+
env.num_target_waypoints: 3
34+
env.min_waypoint_spacing: 20.0
35+
env.max_waypoint_spacing: 60.0
36+
env.goal_radius: 2.0
37+
env.goal_speed: 3.0
38+
39+
# Observation shaping (matches oignons2)
40+
env.obs_slots_lane_n: 80
41+
env.obs_slots_boundary_n: 80
42+
env.obs_slots_partners_n: 16
43+
env.obs_slots_traffic_controls_n: 4
44+
env.obs_range_partner_m: 200.0
45+
env.obs_range_road_front_m: 200.0
46+
env.obs_range_road_behind_m: 40.0
47+
env.obs_range_road_side_m: 50.0
48+
env.obs_range_traffic_control_m: 100.0
49+
env.obs_norm_xy_offset_m: 200.0
50+
env.obs_norm_goal_offset_m: 200.0
51+
env.obs_norm_road_seg_length_m: 10.0
52+
env.obs_norm_road_seg_width_m: 5.0
53+
env.obs_norm_veh_length_m: 15.0
54+
env.obs_norm_veh_width_m: 10.0
55+
env.obs_dropout_lane: 0.5
56+
env.obs_dropout_boundary: 0.4
57+
58+
# Perturbations (on during training; eval's clean macro zeros these)
59+
env.partner_blindness_prob: 0.03
60+
env.partner_blindness_trigger_prob: 0.05
61+
env.phantom_braking_prob: 0.02
62+
env.phantom_braking_trigger_prob: 0.02
63+
env.phantom_braking_duration: 10
64+
65+
# Reward shaping (oignons2 weights + conditioning/randomization on)
66+
env.reward_conditioning: true
67+
env.reward_randomization: true
68+
env.reward_goal: 1.0
69+
env.reward_collision: 1.5
70+
env.reward_offroad: 1.5
71+
env.reward_stop_line: 1.0
72+
env.reward_comfort: 0.05
73+
env.reward_lane_align: 0.025
74+
env.reward_vel_align: 1.0
75+
env.reward_lane_center: 0.005
76+
env.reward_velocity: 0.0025
77+
env.reward_reverse: 0.005
78+
env.reward_timestep: 2.5e-05
79+
env.reward_overspeed: 0.05
80+
81+
# Policy — 3x1024 backbone, split actor/critic, gigaflow encoder
82+
policy.input_size: 256
83+
policy.backbone_hidden_size: 1024
84+
policy.backbone_num_layers: 3
85+
policy.actor_hidden_size: 1024
86+
policy.actor_num_layers: 0
87+
policy.critic_hidden_size: 1024
88+
policy.critic_num_layers: 0
89+
policy.split_network: true
90+
policy.encoder_gigaflow: true
91+
policy.dropout: 0.0
92+
93+
# Training — 125B steps, large minibatch, compiled bfloat16
94+
train.total_timesteps: 125_000_000_000
95+
train.learning_rate: 0.0005
96+
train.minibatch_size: 153600
97+
train.max_minibatch_size: 153600
98+
train.update_epochs: 3
99+
train.bptt_horizon: 128
100+
train.compile: true
101+
train.precision: bfloat16
102+
train.normalize_rewards: false
103+
train.checkpoint_interval: 500
104+
train.optimizer: adamw
105+
106+
# Eval — keep validation_gigaflow (CARLA sweep) inline, disable everything else
107+
# (validation_replay needs nuPlan bins; behaviors_* need labelled scene
108+
# categories not used in this nightly). Interval 250 keeps eval cost ~5% of
109+
# wall-clock instead of ~85%.
110+
eval.validation_defaults.interval: 250
111+
eval.validation_replay.enabled: 0
112+
eval.validation_gigaflow.render_backend: egl
113+
eval.behaviors_full_dir.enabled: 0
114+
eval.behaviors_hard_stop.enabled: 0
115+
eval.behaviors_highway_straight.enabled: 0
116+
eval.behaviors_lane_change.enabled: 0
117+
eval.behaviors_merge.enabled: 0
118+
eval.behaviors_parked_cars.enabled: 0
119+
eval.behaviors_roundabout.enabled: 0
120+
eval.behaviors_stopped_traffic.enabled: 0
121+
eval.behaviors_traffic_light_green.enabled: 0
122+
eval.behaviors_traffic_light_stop.enabled: 0
123+
eval.behaviors_unprotected_left.enabled: 0
124+
eval.behaviors_unprotected_right.enabled: 0
125+
126+
# W&B — project nightly-multi-agent; group has no space (submit_cluster.py
127+
# joins the inner command without quoting arg values).
128+
wandb: True
129+
wandb_project: nightly-multi-agent
130+
wandb_group: Nightly_MultiAgent

scripts/launch_nightly_best.sh

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
#!/bin/bash
2+
# Launch multi-agent "best launch" nightly training on the cluster via
3+
# submit_cluster.py. Derived from oignons2 (emerge/temp_training); see
4+
# scripts/cluster_configs/nightly_best_launch.yaml for the config.
5+
# Three seeds per launch, date-stamped wandb run names.
6+
#
7+
# Run on the login node (sources the venv and submits from there):
8+
# ./scripts/launch_nightly_best.sh
9+
#
10+
# Overridable via the environment:
11+
# PROGRAM_CONFIG default: scripts/cluster_configs/nightly_best_launch.yaml
12+
# SEEDS colon sweep passed to --args train.seed (default 0:1:2)
13+
# ACCOUNT/PARTITION/TIME/MEM SLURM overrides
14+
# PREFIX run-name prefix (default <date>_multi_agent)
15+
set -euo pipefail
16+
17+
PROGRAM_CONFIG="${PROGRAM_CONFIG:-scripts/cluster_configs/nightly_best_launch.yaml}"
18+
COMPUTE_CONFIG="${COMPUTE_CONFIG:-scripts/cluster_configs/nyu_greene.yaml}"
19+
ACCOUNT="${ACCOUNT:-torch_pr_924_tandon_advanced}"
20+
PARTITION="${PARTITION:-h200_tandon}"
21+
TIME="${TIME:-1800}"
22+
MEM="${MEM:-192gb}"
23+
SEEDS="${SEEDS:-0:1:2}"
24+
PREFIX="${PREFIX:-$(date +%Y-%m-%d)_multi_agent}"
25+
DATE_STAMP="$(date +%Y-%m-%d)"
26+
27+
source "/scratch/$USER/venvs/pufferdrive/bin/activate"
28+
29+
# One submission per seed so we can pass a per-seed run_name (wandb display
30+
# name like 2026-06-01_seed0)
31+
IFS=':' read -ra SEED_LIST <<< "$SEEDS"
32+
for SEED in "${SEED_LIST[@]}"; do
33+
python scripts/submit_cluster.py \
34+
--save_dir "/scratch/$USER/runs" \
35+
--prefix "$PREFIX" \
36+
--compute_config "$COMPUTE_CONFIG" \
37+
--program_config "$PROGRAM_CONFIG" \
38+
--container --heartbeat \
39+
--account "$ACCOUNT" --partition "$PARTITION" --time "$TIME" --mem "$MEM" \
40+
--args "train.seed=$SEED" "run_name=${DATE_STAMP}_seed${SEED}"
41+
done

0 commit comments

Comments
 (0)