|
| 1 | +# Multi-agent "best launch" nightly training program config. |
| 2 | +# Derived from the oignons2 (emerge/temp_training) configuration at: |
| 3 | +# weights/oignons2/config.yaml |
| 4 | +# Adapted to NYU Greene cluster paths and resource shape. Multi-agent gigaflow |
| 5 | +# training over the 8 local CARLA maps with the oignons2 policy architecture, |
| 6 | +# reward shaping (conditioning + randomization on), and partner-blindness / |
| 7 | +# phantom-braking perturbations enabled. Keys here override |
| 8 | +# pufferlib/config/ocean/drive.ini. |
| 9 | +# |
| 10 | +# Launch via scripts/launch_nightly_best.sh (3 seeds, date-stamped). |
| 11 | + |
| 12 | +# Environment — multi-agent gigaflow over all 8 local CARLA towns |
| 13 | +env.simulation_mode: gigaflow |
| 14 | +env.map_dir: pufferlib/resources/drive/binaries/carla |
| 15 | +env.num_maps: 8 |
| 16 | +env.num_agents: 1600 |
| 17 | +env.min_agents_per_env: 1 |
| 18 | +env.max_agents_per_env: 150 |
| 19 | +env.use_map_cache: 1 |
| 20 | +env.scenario_length: 3840 |
| 21 | +env.resample_frequency: 38400 |
| 22 | +env.termination_mode: 1 |
| 23 | +env.inactive_agent_threshold: 0.4 |
| 24 | +env.dynamics_model: jerk |
| 25 | +env.target_type: dijkstra |
| 26 | +env.spawn_initial_speed: 0.0 |
| 27 | +env.dt: 0.1 |
| 28 | +env.traffic_light_behavior: 1 |
| 29 | +env.collision_behavior: 1 |
| 30 | +env.offroad_behavior: 1 |
| 31 | + |
| 32 | +# Goal setup — three sequential waypoints, route-based placement [20, 60m] |
| 33 | +env.num_target_waypoints: 3 |
| 34 | +env.min_waypoint_spacing: 20.0 |
| 35 | +env.max_waypoint_spacing: 60.0 |
| 36 | +env.goal_radius: 2.0 |
| 37 | +env.goal_speed: 3.0 |
| 38 | + |
| 39 | +# Observation shaping (matches oignons2) |
| 40 | +env.obs_slots_lane_n: 80 |
| 41 | +env.obs_slots_boundary_n: 80 |
| 42 | +env.obs_slots_partners_n: 16 |
| 43 | +env.obs_slots_traffic_controls_n: 4 |
| 44 | +env.obs_range_partner_m: 200.0 |
| 45 | +env.obs_range_road_front_m: 200.0 |
| 46 | +env.obs_range_road_behind_m: 40.0 |
| 47 | +env.obs_range_road_side_m: 50.0 |
| 48 | +env.obs_range_traffic_control_m: 100.0 |
| 49 | +env.obs_norm_xy_offset_m: 200.0 |
| 50 | +env.obs_norm_goal_offset_m: 200.0 |
| 51 | +env.obs_norm_road_seg_length_m: 10.0 |
| 52 | +env.obs_norm_road_seg_width_m: 5.0 |
| 53 | +env.obs_norm_veh_length_m: 15.0 |
| 54 | +env.obs_norm_veh_width_m: 10.0 |
| 55 | +env.obs_dropout_lane: 0.5 |
| 56 | +env.obs_dropout_boundary: 0.4 |
| 57 | + |
| 58 | +# Perturbations (on during training; eval's clean macro zeros these) |
| 59 | +env.partner_blindness_prob: 0.03 |
| 60 | +env.partner_blindness_trigger_prob: 0.05 |
| 61 | +env.phantom_braking_prob: 0.02 |
| 62 | +env.phantom_braking_trigger_prob: 0.02 |
| 63 | +env.phantom_braking_duration: 10 |
| 64 | + |
| 65 | +# Reward shaping (oignons2 weights + conditioning/randomization on) |
| 66 | +env.reward_conditioning: true |
| 67 | +env.reward_randomization: true |
| 68 | +env.reward_goal: 1.0 |
| 69 | +env.reward_collision: 1.5 |
| 70 | +env.reward_offroad: 1.5 |
| 71 | +env.reward_stop_line: 1.0 |
| 72 | +env.reward_comfort: 0.05 |
| 73 | +env.reward_lane_align: 0.025 |
| 74 | +env.reward_vel_align: 1.0 |
| 75 | +env.reward_lane_center: 0.005 |
| 76 | +env.reward_velocity: 0.0025 |
| 77 | +env.reward_reverse: 0.005 |
| 78 | +env.reward_timestep: 2.5e-05 |
| 79 | +env.reward_overspeed: 0.05 |
| 80 | + |
| 81 | +# Policy — 3x1024 backbone, split actor/critic, gigaflow encoder |
| 82 | +policy.input_size: 256 |
| 83 | +policy.backbone_hidden_size: 1024 |
| 84 | +policy.backbone_num_layers: 3 |
| 85 | +policy.actor_hidden_size: 1024 |
| 86 | +policy.actor_num_layers: 0 |
| 87 | +policy.critic_hidden_size: 1024 |
| 88 | +policy.critic_num_layers: 0 |
| 89 | +policy.split_network: true |
| 90 | +policy.encoder_gigaflow: true |
| 91 | +policy.dropout: 0.0 |
| 92 | + |
| 93 | +# Training — 125B steps, large minibatch, compiled bfloat16 |
| 94 | +train.total_timesteps: 125_000_000_000 |
| 95 | +train.learning_rate: 0.0005 |
| 96 | +train.minibatch_size: 153600 |
| 97 | +train.max_minibatch_size: 153600 |
| 98 | +train.update_epochs: 3 |
| 99 | +train.bptt_horizon: 128 |
| 100 | +train.compile: true |
| 101 | +train.precision: bfloat16 |
| 102 | +train.normalize_rewards: false |
| 103 | +train.checkpoint_interval: 500 |
| 104 | +train.optimizer: adamw |
| 105 | + |
| 106 | +# Eval — keep validation_gigaflow (CARLA sweep) inline, disable everything else |
| 107 | +# (validation_replay needs nuPlan bins; behaviors_* need labelled scene |
| 108 | +# categories not used in this nightly). Interval 250 keeps eval cost ~5% of |
| 109 | +# wall-clock instead of ~85%. |
| 110 | +eval.validation_defaults.interval: 250 |
| 111 | +eval.validation_replay.enabled: 0 |
| 112 | +eval.validation_gigaflow.render_backend: egl |
| 113 | +eval.behaviors_full_dir.enabled: 0 |
| 114 | +eval.behaviors_hard_stop.enabled: 0 |
| 115 | +eval.behaviors_highway_straight.enabled: 0 |
| 116 | +eval.behaviors_lane_change.enabled: 0 |
| 117 | +eval.behaviors_merge.enabled: 0 |
| 118 | +eval.behaviors_parked_cars.enabled: 0 |
| 119 | +eval.behaviors_roundabout.enabled: 0 |
| 120 | +eval.behaviors_stopped_traffic.enabled: 0 |
| 121 | +eval.behaviors_traffic_light_green.enabled: 0 |
| 122 | +eval.behaviors_traffic_light_stop.enabled: 0 |
| 123 | +eval.behaviors_unprotected_left.enabled: 0 |
| 124 | +eval.behaviors_unprotected_right.enabled: 0 |
| 125 | + |
| 126 | +# W&B — project nightly-multi-agent; group has no space (submit_cluster.py |
| 127 | +# joins the inner command without quoting arg values). |
| 128 | +wandb: True |
| 129 | +wandb_project: nightly-multi-agent |
| 130 | +wandb_group: Nightly_MultiAgent |
0 commit comments