|
| 1 | +# Multi-agent "best launch" nightly training program config. |
| 2 | +# Derived from the oignons2 (emerge/temp_training) configuration at: |
| 3 | +# weights/oignons2/config.yaml |
| 4 | +# Adapted to NYU Greene cluster paths and resource shape. Multi-agent gigaflow |
| 5 | +# training over the 8 local CARLA maps with the oignons2 policy architecture, |
| 6 | +# reward shaping (conditioning + randomization on), and partner-blindness / |
| 7 | +# phantom-braking perturbations enabled. Keys here override |
| 8 | +# pufferlib/config/ocean/drive.ini. |
| 9 | +# |
| 10 | +# Launch via scripts/launch_nightly_best.sh (3 seeds, date-stamped). |
| 11 | + |
| 12 | +# Environment — multi-agent gigaflow over all 8 local CARLA towns |
| 13 | +env.simulation_mode: gigaflow |
| 14 | +env.map_dir: pufferlib/resources/drive/binaries/carla |
| 15 | +env.num_maps: 8 |
| 16 | +env.num_agents: 720000 |
| 17 | +env.min_agents_per_env: 1 |
| 18 | +env.max_agents_per_env: 150 |
| 19 | +env.use_map_cache: 1 |
| 20 | +env.scenario_length: 1200 |
| 21 | +# 0 disables periodic scenario resampling — every sub-env keeps the same map |
| 22 | +# for the full run instead of swapping every 38400 steps. |
| 23 | +env.resample_frequency: 0 |
| 24 | +env.termination_mode: 1 |
| 25 | +env.inactive_agent_threshold: 0.4 |
| 26 | +env.dynamics_model: jerk |
| 27 | +env.target_type: static |
| 28 | +env.spawn_initial_speed: 0.0 |
| 29 | +env.dt: 0.3 |
| 30 | +env.traffic_light_behavior: 1 |
| 31 | +env.collision_behavior: 1 |
| 32 | +env.offroad_behavior: 1 |
| 33 | + |
| 34 | +# Goal setup — three sequential waypoints, route-based placement [20, 60m] |
| 35 | +env.num_target_waypoints: 3 |
| 36 | +env.min_waypoint_spacing: 20.0 |
| 37 | +env.max_waypoint_spacing: 60.0 |
| 38 | +env.goal_radius: 2.0 |
| 39 | +env.goal_speed: 3.0 |
| 40 | + |
| 41 | +# Observation shaping (matches oignons2) |
| 42 | +env.obs_slots_lane_n: 80 |
| 43 | +env.obs_slots_boundary_n: 80 |
| 44 | +env.obs_slots_partners_n: 16 |
| 45 | +env.obs_slots_traffic_controls_n: 4 |
| 46 | +env.obs_range_partner_m: 200.0 |
| 47 | +env.obs_range_road_front_m: 200.0 |
| 48 | +env.obs_range_road_behind_m: 40.0 |
| 49 | +env.obs_range_road_side_m: 50.0 |
| 50 | +env.obs_range_traffic_control_m: 100.0 |
| 51 | +env.obs_norm_xy_offset_m: 200.0 |
| 52 | +env.obs_norm_goal_offset_m: 200.0 |
| 53 | +env.obs_norm_road_seg_length_m: 10.0 |
| 54 | +env.obs_norm_road_seg_width_m: 5.0 |
| 55 | +env.obs_norm_veh_length_m: 15.0 |
| 56 | +env.obs_norm_veh_width_m: 10.0 |
| 57 | +env.obs_dropout_lane: 0.5 |
| 58 | +env.obs_dropout_boundary: 0.4 |
| 59 | + |
| 60 | +# Perturbations (on during training; eval's clean macro zeros these) |
| 61 | +env.partner_blindness_prob: 0.03 |
| 62 | +env.partner_blindness_trigger_prob: 0.05 |
| 63 | +env.phantom_braking_prob: 0.02 |
| 64 | +env.phantom_braking_trigger_prob: 0.02 |
| 65 | +env.phantom_braking_duration: 10 |
| 66 | + |
| 67 | +# Reward shaping (oignons2 weights + conditioning/randomization on) |
| 68 | +env.reward_conditioning: true |
| 69 | +env.reward_randomization: true |
| 70 | +env.reward_goal: 1.0 |
| 71 | +env.reward_collision: 1.5 |
| 72 | +env.reward_offroad: 1.5 |
| 73 | +env.reward_stop_line: 1.0 |
| 74 | +env.reward_comfort: 0.05 |
| 75 | +env.reward_lane_align: 0.025 |
| 76 | +env.reward_vel_align: 1.0 |
| 77 | +env.reward_lane_center: 0.005 |
| 78 | +env.reward_velocity: 0.0025 |
| 79 | +env.reward_reverse: 0.005 |
| 80 | +env.reward_timestep: 2.5e-05 |
| 81 | +env.reward_overspeed: 0.05 |
| 82 | + |
| 83 | +# Policy — 3x1024 backbone, split actor/critic, gigaflow encoder |
| 84 | +policy.input_size: 256 |
| 85 | +policy.backbone_hidden_size: 1024 |
| 86 | +policy.backbone_num_layers: 3 |
| 87 | +policy.actor_hidden_size: 1024 |
| 88 | +policy.actor_num_layers: 0 |
| 89 | +policy.critic_hidden_size: 1024 |
| 90 | +policy.critic_num_layers: 0 |
| 91 | +policy.split_network: true |
| 92 | +policy.encoder_gigaflow: true |
| 93 | +policy.dropout: 0.0 |
| 94 | + |
| 95 | +# Training — 10B steps, large minibatch, compiled bfloat16 |
| 96 | +train.total_timesteps: 10_000_000_000 |
| 97 | +train.learning_rate: 0.0005 |
| 98 | +train.minibatch_size: 153600 |
| 99 | +train.max_minibatch_size: 153600 |
| 100 | +train.update_epochs: 3 |
| 101 | +train.bptt_horizon: 128 |
| 102 | +train.compile: true |
| 103 | +train.precision: bfloat16 |
| 104 | +train.normalize_rewards: false |
| 105 | +train.checkpoint_interval: 500 |
| 106 | +train.optimizer: adamw |
| 107 | + |
| 108 | +# Eval — keep validation_gigaflow (CARLA sweep) inline, disable everything else |
| 109 | +# (validation_replay needs nuPlan bins; behaviors_* need labelled scene |
| 110 | +# categories not used in this nightly). Interval 250 keeps eval cost ~5% of |
| 111 | +# wall-clock instead of ~85%. |
| 112 | +eval.validation_defaults.interval: 250 |
| 113 | +eval.validation_replay.enabled: 0 |
| 114 | +eval.validation_gigaflow.render_backend: egl |
| 115 | +eval.behaviors_full_dir.enabled: 0 |
| 116 | +eval.behaviors_hard_stop.enabled: 0 |
| 117 | +eval.behaviors_highway_straight.enabled: 0 |
| 118 | +eval.behaviors_lane_change.enabled: 0 |
| 119 | +eval.behaviors_merge.enabled: 0 |
| 120 | +eval.behaviors_parked_cars.enabled: 0 |
| 121 | +eval.behaviors_roundabout.enabled: 0 |
| 122 | +eval.behaviors_stopped_traffic.enabled: 0 |
| 123 | +eval.behaviors_traffic_light_green.enabled: 0 |
| 124 | +eval.behaviors_traffic_light_stop.enabled: 0 |
| 125 | +eval.behaviors_unprotected_left.enabled: 0 |
| 126 | +eval.behaviors_unprotected_right.enabled: 0 |
| 127 | + |
| 128 | +# W&B — group has no space (submit_cluster.py joins the inner command |
| 129 | +# without quoting arg values). Launchers (launch_nightly_best.sh and |
| 130 | +# Modal's nightly()) override wandb_group to today's date at launch so |
| 131 | +# runs cluster by night in the UI; the static value here is just the |
| 132 | +# fallback for ad-hoc invocations. |
| 133 | +wandb: True |
| 134 | +wandb_project: nightly-multi |
| 135 | +wandb_group: nightly-multi |
0 commit comments