@@ -108,6 +108,9 @@ class ExperimentConfig:
108108 max_parallel : int = 64
109109 discard_episode_timeout : int = 240
110110 project_name : str = "werewolves_multi_model"
111+ # Random player split mode: at each episode start, randomly split
112+ # good-side players among trainable models (ignoring role-based assignment)
113+ random_player_split : bool = False
111114
112115 def __post_init__ (self ):
113116 # Validate that all trainable roles are from the same faction
@@ -150,26 +153,29 @@ def __init__(
150153 swarm_clients : Dict [str , SwarmClient ],
151154 opponent_model : str ,
152155 opponent_url : str ,
156+ random_player_split : bool = False ,
153157 ):
154158 self .model_configs = model_configs
155159 self .swarm_clients = swarm_clients
156160 self .opponent_model = opponent_model
157161 self .opponent_url = opponent_url
162+ self .random_player_split = random_player_split
158163
159164 # Build role -> model_id mapping (for roles without index constraints)
160165 self .role_to_model : Dict [str , str ] = {}
161166 # Build (role, index) -> model_id mapping (for indexed assignments)
162167 self .role_index_to_model : Dict [Tuple [str , int ], str ] = {}
163168
164- for mc in model_configs :
165- for role in mc .roles :
166- if mc .role_indices and role in mc .role_indices :
167- # Index-based assignment
168- for idx in mc .role_indices [role ]:
169- self .role_index_to_model [(role , idx )] = mc .model_id
170- else :
171- # Role-based assignment (all instances)
172- self .role_to_model [role ] = mc .model_id
169+ if not random_player_split :
170+ for mc in model_configs :
171+ for role in mc .roles :
172+ if mc .role_indices and role in mc .role_indices :
173+ # Index-based assignment
174+ for idx in mc .role_indices [role ]:
175+ self .role_index_to_model [(role , idx )] = mc .model_id
176+ else :
177+ # Role-based assignment (all instances)
178+ self .role_to_model [role ] = mc .model_id
173179
174180 def get_trainable_targets (self ) -> List [str ]:
175181 """Get all trainable roles across all models."""
@@ -216,17 +222,38 @@ async def execute(
216222 # Track which model each player uses
217223 player_to_model : Dict [int , str ] = {}
218224
225+ # For random_player_split mode: randomly assign good-side players to models
226+ player_to_model_split : Dict [int , str ] = {}
227+ if self .random_player_split :
228+ # Identify all good-side player indices
229+ good_player_indices = [i for i , role in enumerate (roles ) if role in GOOD_ROLES ]
230+ # Shuffle and split 50/50
231+ np .random .shuffle (good_player_indices )
232+ half = len (good_player_indices ) // 2
233+ model_ids = [mc .model_id for mc in self .model_configs ]
234+ for i , player_idx in enumerate (good_player_indices ):
235+ # First half -> M1, second half -> M2
236+ model_id_for_player = model_ids [0 ] if i < half else model_ids [1 ]
237+ player_to_model_split [player_idx ] = model_id_for_player
238+ logger .info (f"Random player split: M1={ [p for p , m in player_to_model_split .items () if m == model_ids [0 ]]} , "
239+ f"M2={ [p for p , m in player_to_model_split .items () if m == model_ids [1 ]]} " )
240+
219241 # Initialize agents
220242 players = []
221243 for i , role in enumerate (roles ):
222244 # Get the index of this role instance (0, 1, 2 for werewolves, etc.)
223245 role_idx = role_counters .get (role , 0 )
224246 role_counters [role ] = role_idx + 1
225247
226- # Try to find model: first by (role, index), then by role only
227- model_id = self .role_index_to_model .get ((role , role_idx ))
228- if model_id is None :
229- model_id = self .role_to_model .get (role )
248+ # Determine model_id based on assignment mode
249+ if self .random_player_split :
250+ # In random split mode, use player-based assignment
251+ model_id = player_to_model_split .get (i )
252+ else :
253+ # Try to find model: first by (role, index), then by role only
254+ model_id = self .role_index_to_model .get ((role , role_idx ))
255+ if model_id is None :
256+ model_id = self .role_to_model .get (role )
230257
231258 if model_id is None :
232259 # Non-trainable role - use opponent model
@@ -326,6 +353,8 @@ def setup(self):
326353 lora_rank = mc .lora .rank if mc .lora .enabled else None ,
327354 lora_alpha = mc .lora .alpha if mc .lora .enabled else None ,
328355 lora_target_modules = mc .lora .target_modules if mc .lora .enabled else None ,
356+ lr = 3e-4 ,
357+ layered_summon = True ,
329358 )
330359
331360 self .jobs [mc .model_id ] = job
@@ -358,6 +387,7 @@ def run(self):
358387 swarm_clients = self .swarm_clients ,
359388 opponent_model = self .config .opponent_model ,
360389 opponent_url = self .config .opponent_url ,
390+ random_player_split = self .config .random_player_split ,
361391 )
362392
363393 def rollout (task : Task ):
@@ -405,7 +435,7 @@ def rollout(task: Task):
405435# Predefined Experiment Configurations
406436# ============================================================================
407437
408- VERSION = "v2 "
438+ VERSION = "v3 "
409439
410440
411441def get_exp1_config () -> ExperimentConfig :
@@ -529,35 +559,36 @@ def get_exp3_config() -> ExperimentConfig:
529559
530560def get_exp4_config () -> ExperimentConfig :
531561 """
532- Experiment 4: Two models with random 50/50 split of good roles .
533- - M1 (14B-LoRA): 50% random non-werewolf characters
534- - M2 (14B-LoRA): remaining 50% non-werewolf characters
562+ Experiment 4: Two models with random 50/50 split of good-side players .
563+ - M1 (14B-LoRA): randomly selected 50% of good-side players per episode
564+ - M2 (14B-LoRA): remaining 50% of good-side players
535565 - Opponents (235B): werewolf
536566
537- Role assignment is randomized per game.
567+ At the start of each episode, the 6 good-side players (3 villagers,
568+ 1 seer, 1 witch, 1 hunter) are randomly split: 3 players go to M1,
569+ 3 players go to M2. This is player-based, not role-based assignment.
538570 """
539- # For simplicity, we do a fixed 50/50 split here
540- # In practice, the split could be randomized per game
541571 return ExperimentConfig (
542572 model_configs = [
543573 ModelConfig (
544574 model_id = "M1" ,
545575 swarm_url = "http://localhost:10086" ,
546576 model_path = DEFAULT_MODEL_14B ,
547- roles = [ "villager" , "seer" ], # ~50% of good roles
577+ roles = GOOD_ROLES , # All good roles (for validation only)
548578 lora = LoraConfig (enabled = True , rank = 32 , alpha = 32 ),
549579 experiment_name = f"werewolves_exp4_m1_half_{ VERSION } " ,
550580 ),
551581 ModelConfig (
552582 model_id = "M2" ,
553583 swarm_url = "http://localhost:10087" ,
554584 model_path = DEFAULT_MODEL_14B ,
555- roles = [ "witch" , "hunter" ], # ~50% of good roles
585+ roles = GOOD_ROLES , # All good roles (for validation only)
556586 lora = LoraConfig (enabled = True , rank = 32 , alpha = 32 ),
557587 experiment_name = f"werewolves_exp4_m2_half_{ VERSION } " ,
558588 ),
559589 ],
560590 project_name = "werewolves_exp4_random_split" ,
591+ random_player_split = True , # Enable random player-based assignment
561592 )
562593
563594
0 commit comments