Add layered_summon parameter to AgentJetJob and related configurations

binary-husky · binary-husky · commit 1bb02c836300 · 2026-04-16T14:13:13.000+08:00
diff --git a/ajet/copilot/job.py b/ajet/copilot/job.py
@@ -64,6 +64,7 @@ class AgentJetJob:
         lora_alpha: LoRA alpha scaling factor (default 16).
         lora_target_modules: Target modules for LoRA adaptation (default 'all-linear').
         lora_load_format: Load format for LoRA weights (default 'auto').
+        layered_summon: Enable layered summon for LoRA (default False).
         gpu_memory_utilization: GPU memory utilization for vLLM engine (default 0.85).
         lr: Learning rate for optimizer (default 1e-6).
     """
@@ -93,6 +94,7 @@ def __init__(
         lora_alpha: int | None = None,
         lora_target_modules: str | None = None,
         lora_load_format: str | None = None,
+        layered_summon: bool | None = None,
         gpu_memory_utilization: float | None = None,
         lr: float | None = None,
     ) -> None:
@@ -136,6 +138,7 @@ def __init__(
         self.lora_alpha: int = cast(int, lora_alpha)
         self.lora_target_modules: str = cast(str, lora_target_modules)
         self.lora_load_format: str = cast(str, lora_load_format)
+        self.layered_summon: bool = cast(bool, layered_summon)
         self.gpu_memory_utilization: float = cast(float, gpu_memory_utilization)
         self.lr: float = cast(float, lr)
 
@@ -164,6 +167,7 @@ def __init__(
             "ajet.lora.lora_alpha":                         "lora_alpha",
             "ajet.lora.target_modules":                     "lora_target_modules",
             "ajet.lora.load_format":                        "lora_load_format",
+            "ajet.lora.layered_summon":                     "layered_summon",
             "ajet.rollout.gpu_memory_utilization":          "gpu_memory_utilization",
             "ajet.trainer_common.optim.lr":                 "lr",
         }
@@ -194,6 +198,8 @@ def __init__(
         if self.lora_rank > 0:
             if self.lora_load_format != "safetensors":
                 raise ValueError(f"When lora_rank > 0, lora_load_format must be 'safetensors', got '{self.lora_load_format}'")
+            if not self.layered_summon:
+                raise ValueError("When lora_rank > 0, layered_summon must be True")
             if self.lr is None:
                 raise ValueError("lr should be provided for lora training")
             if self.lr <= 1e-5:
diff --git a/ajet/default_config/ajet_config_schema.py b/ajet/default_config/ajet_config_schema.py
@@ -46,6 +46,7 @@ class AjetLora:
     lora_alpha: int = 16
     target_modules: str = "all-linear"
     load_format: str = "auto"
+    layered_summon: bool = False
 
 
 @dataclass
diff --git a/ajet/default_config/ajet_default.yaml b/ajet/default_config/ajet_default.yaml
@@ -316,6 +316,7 @@ ajet:
     lora_alpha: 16
     target_modules: all-linear
     load_format: auto
+    layered_summon: false
 
 
   # the experimental ZeroMQ interchange server feature that allows `tuner.as_oai_baseurl_apikey` feature
diff --git a/ajet/default_config/verl/config_auto_convertion_verl.jsonc b/ajet/default_config/verl/config_auto_convertion_verl.jsonc
@@ -35,6 +35,7 @@
     "ajet.lora.lora_alpha": "actor_rollout_ref.model.lora_alpha",
     "ajet.lora.target_modules": "actor_rollout_ref.model.target_modules",
     "ajet.lora.load_format": "actor_rollout_ref.rollout.load_format",
+    "ajet.lora.layered_summon": "actor_rollout_ref.rollout.layered_summon",
 
     "ajet.trainer_common.total_training_steps": "trainer.total_training_steps",
     "ajet.trainer_common.save_freq": "trainer.save_freq",
diff --git a/ajet/utils/config_utils.py b/ajet/utils/config_utils.py
@@ -183,15 +183,21 @@ def align_parameter_safe_guard(config: dict, backbone: str) -> dict:
     if backbone == "verl" and isinstance(config["trainer"]["logger"], str):
         config["trainer"]["logger"] = ["console", config["trainer"]["logger"]]
 
-    # special: LoRA requires safetensors load_format
+    # special: LoRA requires safetensors load_format and layered_summon
     if backbone == "verl":
         lora_rank = config.get("actor_rollout_ref", {}).get("model", {}).get("lora_rank", 0)
         load_format = config.get("actor_rollout_ref", {}).get("rollout", {}).get("load_format", "auto")
+        layered_summon = config.get("actor_rollout_ref", {}).get("rollout", {}).get("layered_summon", False)
         if lora_rank > 0 and load_format != "safetensors":
             raise ValueError(
                 f"LoRA training (lora_rank={lora_rank}) requires load_format='safetensors', "
                 f"but got load_format='{load_format}'. Please set `ajet.lora.load_format: safetensors` in your config."
             )
+        if lora_rank > 0 and not layered_summon:
+            raise ValueError(
+                f"LoRA training (lora_rank={lora_rank}) requires layered_summon=True, "
+                f"but got layered_summon={layered_summon}. Please set `ajet.lora.layered_summon: true` in your config."
+            )
 
     # special: trinity train_batch_size
     if backbone == "trinity":
diff --git a/tutorial/example_train_multi_model/trans_roll_lora.py b/tutorial/example_train_multi_model/trans_roll_lora.py
@@ -52,7 +52,9 @@ def main():
         lora_rank=32,
         lora_alpha=32,
         lora_load_format="safetensors",
+        layered_summon=True,
         lr=3e-4,
+
     )
 
     job_7b = AgentJetJob(
@@ -67,6 +69,7 @@ def main():
         lora_rank=32,
         lora_alpha=32,
         lora_load_format="safetensors",
+        layered_summon=True,
         lr=3e-4,
     )
 
diff --git a/tutorial/example_werewolves_swarm/agent_roll_v2.py b/tutorial/example_werewolves_swarm/agent_roll_v2.py
@@ -108,6 +108,9 @@ class ExperimentConfig:
     max_parallel: int = 64
     discard_episode_timeout: int = 240
     project_name: str = "werewolves_multi_model"
+    # Random player split mode: at each episode start, randomly split
+    # good-side players among trainable models (ignoring role-based assignment)
+    random_player_split: bool = False
 
     def __post_init__(self):
         # Validate that all trainable roles are from the same faction
@@ -150,26 +153,29 @@ def __init__(
         swarm_clients: Dict[str, SwarmClient],
         opponent_model: str,
         opponent_url: str,
+        random_player_split: bool = False,
     ):
         self.model_configs = model_configs
         self.swarm_clients = swarm_clients
         self.opponent_model = opponent_model
         self.opponent_url = opponent_url
+        self.random_player_split = random_player_split
 
         # Build role -> model_id mapping (for roles without index constraints)
         self.role_to_model: Dict[str, str] = {}
         # Build (role, index) -> model_id mapping (for indexed assignments)
         self.role_index_to_model: Dict[Tuple[str, int], str] = {}
 
-        for mc in model_configs:
-            for role in mc.roles:
-                if mc.role_indices and role in mc.role_indices:
-                    # Index-based assignment
-                    for idx in mc.role_indices[role]:
-                        self.role_index_to_model[(role, idx)] = mc.model_id
-                else:
-                    # Role-based assignment (all instances)
-                    self.role_to_model[role] = mc.model_id
+        if not random_player_split:
+            for mc in model_configs:
+                for role in mc.roles:
+                    if mc.role_indices and role in mc.role_indices:
+                        # Index-based assignment
+                        for idx in mc.role_indices[role]:
+                            self.role_index_to_model[(role, idx)] = mc.model_id
+                    else:
+                        # Role-based assignment (all instances)
+                        self.role_to_model[role] = mc.model_id
 
     def get_trainable_targets(self) -> List[str]:
         """Get all trainable roles across all models."""
@@ -216,17 +222,38 @@ async def execute(
         # Track which model each player uses
         player_to_model: Dict[int, str] = {}
 
+        # For random_player_split mode: randomly assign good-side players to models
+        player_to_model_split: Dict[int, str] = {}
+        if self.random_player_split:
+            # Identify all good-side player indices
+            good_player_indices = [i for i, role in enumerate(roles) if role in GOOD_ROLES]
+            # Shuffle and split 50/50
+            np.random.shuffle(good_player_indices)
+            half = len(good_player_indices) // 2
+            model_ids = [mc.model_id for mc in self.model_configs]
+            for i, player_idx in enumerate(good_player_indices):
+                # First half -> M1, second half -> M2
+                model_id_for_player = model_ids[0] if i < half else model_ids[1]
+                player_to_model_split[player_idx] = model_id_for_player
+            logger.info(f"Random player split: M1={[p for p, m in player_to_model_split.items() if m == model_ids[0]]}, "
+                       f"M2={[p for p, m in player_to_model_split.items() if m == model_ids[1]]}")
+
         # Initialize agents
         players = []
         for i, role in enumerate(roles):
             # Get the index of this role instance (0, 1, 2 for werewolves, etc.)
             role_idx = role_counters.get(role, 0)
             role_counters[role] = role_idx + 1
 
-            # Try to find model: first by (role, index), then by role only
-            model_id = self.role_index_to_model.get((role, role_idx))
-            if model_id is None:
-                model_id = self.role_to_model.get(role)
+            # Determine model_id based on assignment mode
+            if self.random_player_split:
+                # In random split mode, use player-based assignment
+                model_id = player_to_model_split.get(i)
+            else:
+                # Try to find model: first by (role, index), then by role only
+                model_id = self.role_index_to_model.get((role, role_idx))
+                if model_id is None:
+                    model_id = self.role_to_model.get(role)
 
             if model_id is None:
                 # Non-trainable role - use opponent model
@@ -326,6 +353,8 @@ def setup(self):
                 lora_rank=mc.lora.rank if mc.lora.enabled else None,
                 lora_alpha=mc.lora.alpha if mc.lora.enabled else None,
                 lora_target_modules=mc.lora.target_modules if mc.lora.enabled else None,
+                lr=3e-4,
+                layered_summon=True,
             )
 
             self.jobs[mc.model_id] = job
@@ -358,6 +387,7 @@ def run(self):
             swarm_clients=self.swarm_clients,
             opponent_model=self.config.opponent_model,
             opponent_url=self.config.opponent_url,
+            random_player_split=self.config.random_player_split,
         )
 
         def rollout(task: Task):
@@ -405,7 +435,7 @@ def rollout(task: Task):
 # Predefined Experiment Configurations
 # ============================================================================
 
-VERSION = "v2"
+VERSION = "v3"
 
 
 def get_exp1_config() -> ExperimentConfig:
@@ -529,35 +559,36 @@ def get_exp3_config() -> ExperimentConfig:
 
 def get_exp4_config() -> ExperimentConfig:
     """
-    Experiment 4: Two models with random 50/50 split of good roles.
-    - M1 (14B-LoRA): 50% random non-werewolf characters
-    - M2 (14B-LoRA): remaining 50% non-werewolf characters
+    Experiment 4: Two models with random 50/50 split of good-side players.
+    - M1 (14B-LoRA): randomly selected 50% of good-side players per episode
+    - M2 (14B-LoRA): remaining 50% of good-side players
     - Opponents (235B): werewolf
 
-    Role assignment is randomized per game.
+    At the start of each episode, the 6 good-side players (3 villagers,
+    1 seer, 1 witch, 1 hunter) are randomly split: 3 players go to M1,
+    3 players go to M2. This is player-based, not role-based assignment.
     """
-    # For simplicity, we do a fixed 50/50 split here
-    # In practice, the split could be randomized per game
     return ExperimentConfig(
         model_configs=[
             ModelConfig(
                 model_id="M1",
                 swarm_url="http://localhost:10086",
                 model_path=DEFAULT_MODEL_14B,
-                roles=["villager", "seer"],  # ~50% of good roles
+                roles=GOOD_ROLES,  # All good roles (for validation only)
                 lora=LoraConfig(enabled=True, rank=32, alpha=32),
                 experiment_name=f"werewolves_exp4_m1_half_{VERSION}",
             ),
             ModelConfig(
                 model_id="M2",
                 swarm_url="http://localhost:10087",
                 model_path=DEFAULT_MODEL_14B,
-                roles=["witch", "hunter"],  # ~50% of good roles
+                roles=GOOD_ROLES,  # All good roles (for validation only)
                 lora=LoraConfig(enabled=True, rank=32, alpha=32),
                 experiment_name=f"werewolves_exp4_m2_half_{VERSION}",
             ),
         ],
         project_name="werewolves_exp4_random_split",
+        random_player_split=True,  # Enable random player-based assignment
     )