Fix pre-commit failures

binary-husky · binary-husky · commit c68c154f9653 · 2026-05-26T17:07:30.000+08:00
diff --git a/ajet/backbone/verl/core_algos.py b/ajet/backbone/verl/core_algos.py
@@ -187,7 +187,6 @@ def update(self, current_kl, n_steps):
             current_kl (float): Current KL divergence value (unused).
             n_steps (int): Number of steps taken (unused).
         """
-        pass
 
 
 def get_kl_controller(kl_ctrl):
diff --git a/ajet/context_tracker/multiagent_tracking.py b/ajet/context_tracker/multiagent_tracking.py
@@ -2,7 +2,7 @@
 import copy
 import json
 from dataclasses import dataclass, field
-from typing import List, Tuple
+from typing import List, Tuple, cast
 
 from beast_logger import NestedJsonItem, SeqItem, print_dict, print_nested
 from loguru import logger
@@ -173,12 +173,14 @@ def step_spawn_timeline(self, messages: List[dict], tools: List = [], disable_to
 
             any_later_msg_has_user_role = any((m["role"] == "user") for m in messages[i+1:])
 
+            msg_content = cast(str, msg["content"])
+
             # extract content block from openai-competible messages and convert to ExtendedMessage
             timeline += [
                 ExtendedMessage(
                     author=author,
                     role=msg["role"],
-                    content=msg["content"],
+                    content=msg_content,
                     tokenizer=self.tokenizer,
                     tools=tools,
                     tool_calls=(msg["tool_calls"] if "tool_calls" in msg else []),
@@ -189,7 +191,7 @@ def step_spawn_timeline(self, messages: List[dict], tools: List = [], disable_to
                     before_last_query=any_later_msg_has_user_role
                 )
             ]
-            if ("<think>" in msg["content"]) and (not previous_message_encounter_user_role):
+            if ("<think>" in msg_content) and (not previous_message_encounter_user_role):
                 logger.warning(f"Warning! Message content contains <think> tag, but no prior message has `user` role! This is not a common scenario. Please check your agent loop carefully.")
 
         return timeline
diff --git a/ajet/copilot/create-keep-think-model-chat-template/SKILL.md b/ajet/copilot/create-keep-think-model-chat-template/SKILL.md
@@ -12,5 +12,3 @@ You must not do this in-place, instead, please create another model.
 E.g., "/mnt/data_cpfs/xielipeng.xlp/models/Qwen3-8B" -> "/mnt/data_cpfs/xielipeng.xlp/models/Qwen3-8B-Keep-History"
 For all files within the original model path, please create symbolic links instead of copying files.
 With only one exception, the tokenizer config file, which should be copied and modified to change the chat template.
-
-
diff --git a/ajet/tokenizer/service.py b/ajet/tokenizer/service.py
@@ -20,7 +20,7 @@
 import time
 from collections import OrderedDict
 from concurrent.futures import Future, ThreadPoolExecutor
-from typing import Any, Optional
+from typing import Optional
 
 import msgpack
 import zmq
diff --git a/ajet/tuner_lib/experimental/swarm_server.py b/ajet/tuner_lib/experimental/swarm_server.py
@@ -14,7 +14,7 @@
 from ajet.utils.process_killer import kill_process_tree
 from ajet.utils.sync_train_code import extract_ajet_zip
 from ajet.tuner_lib.experimental.swarm_overwatch_utils import CurrentBatchRolloutPoolInformation
-from ajet.tuner_lib.experimental.interchange_utils import DEBUG, VERBOSE, CLIENT_ACTIVE_TIMEOUT
+from ajet.tuner_lib.experimental.interchange_utils import DEBUG, VERBOSE
 from ajet.tuner_lib.experimental.interchange_utils import (
     SyncTrainConfigRequest,
     ClaimEpisodeRequest,
diff --git a/ajet/utils/advantage.py b/ajet/utils/advantage.py
@@ -1,3 +1,15 @@
+from __future__ import annotations
+
+from collections import defaultdict
+from typing import TYPE_CHECKING, Optional
+
+import numpy as np
+import torch
+
+if TYPE_CHECKING:
+    from verl.trainer.config import AlgoConfig
+
+
 def compute_grpo_outcome_advantage(
     token_level_rewards: torch.Tensor,
     response_mask: torch.Tensor,
diff --git a/tutorial/example_appworld_swarm/agent_roll_timeline_study_text_level_tl.py b/tutorial/example_appworld_swarm/agent_roll_timeline_study_text_level_tl.py
@@ -12,11 +12,8 @@
 """
 
 import os
-import statistics
-from concurrent.futures import ThreadPoolExecutor, as_completed
 from typing import Generator, List
 
-from tqdm import tqdm
 
 from ajet.copilot.job import AgentJetJob
 from ajet.schema.task import Task
diff --git a/tutorial/example_appworld_swarm/agent_roll_timeline_study_token_level_tl.py b/tutorial/example_appworld_swarm/agent_roll_timeline_study_token_level_tl.py
@@ -12,11 +12,8 @@
 """
 
 import os
-import statistics
-from concurrent.futures import ThreadPoolExecutor, as_completed
 from typing import Generator, List
 
-from tqdm import tqdm
 
 from ajet.copilot.job import AgentJetJob
 from ajet.schema.task import Task
diff --git a/tutorial/example_appworld_swarm/agent_roll_timeline_study_token_level_tl_qwen3_original.py b/tutorial/example_appworld_swarm/agent_roll_timeline_study_token_level_tl_qwen3_original.py
@@ -12,11 +12,8 @@
 """
 
 import os
-import statistics
-from concurrent.futures import ThreadPoolExecutor, as_completed
 from typing import Generator, List
 
-from tqdm import tqdm
 
 from ajet.copilot.job import AgentJetJob
 from ajet.schema.task import Task
diff --git a/tutorial/example_cocktail_rl_v2/train_aime_as_swarm_client_1.py b/tutorial/example_cocktail_rl_v2/train_aime_as_swarm_client_1.py
@@ -27,6 +27,12 @@
 _THIS_DIR = os.path.dirname(__file__)
 
 
+def _scalar_reward(reward: float | list[float] | None) -> float:
+    if not isinstance(reward, (int, float)):
+        raise TypeError(f"Expected scalar reward, got {type(reward).__name__}: {reward!r}")
+    return float(reward)
+
+
 @dataclass
 class _AimeAgentConfig:
     """Duck-types the subset of AgentJetJob that execute_agent reads."""
@@ -113,7 +119,7 @@ def rollout(self, task: Task) -> float:
         )
         out = _execute_aime_agent(task, api_baseurl_key, self.agent_config)
         self.swarm_worker.end_episode(task, episode_uuid, out)
-        return out.reward
+        return _scalar_reward(out.reward)
 
     def eval_rollout(self, task: Task) -> float:
         assert self.swarm_worker is not None
@@ -122,7 +128,7 @@ def eval_rollout(self, task: Task) -> float:
         )
         try:
             out = _execute_aime_agent(task, api_baseurl_key, self.agent_config)
-            return out.reward
+            return _scalar_reward(out.reward)
         finally:
             self.swarm_worker.abort_episode(episode_uuid)
 
diff --git a/tutorial/example_cocktail_rl_v2/train_appworld_as_swarm_client_0.py b/tutorial/example_cocktail_rl_v2/train_appworld_as_swarm_client_0.py
@@ -7,7 +7,6 @@
 
 from __future__ import annotations
 
-import os
 import random
 from typing import Iterator, List, Optional
 
diff --git a/tutorial/example_math_agent/math_agent.md b/tutorial/example_math_agent/math_agent.md
@@ -1,4 +1,4 @@
 # Training a basic math agent
 
 
-`ajet --conf tutorial/example_math_agent/math_agent.yaml`
+`ajet --conf tutorial/example_math_agent/math_agent.yaml`
diff --git a/tutorial/opencode_build_aime/agent_run_v3.py b/tutorial/opencode_build_aime/agent_run_v3.py
@@ -21,6 +21,7 @@
 import time
 from dataclasses import dataclass
 from textwrap import dedent
+from typing import Protocol
 from uuid import uuid4
 
 from openai import OpenAI
@@ -34,6 +35,11 @@
 TIMEOUT_EXIT_CODE = -101
 
 
+class AimeAgentConfigLike(Protocol):
+    model: str
+    max_response_length: int
+
+
 # ==================== Python Code Execution Tool ====================
 # Adapted from code-judge/app/libs/executors/executor.py
 
@@ -457,7 +463,7 @@ def compute_reward(solution_str: str, ground_truth: str) -> dict:
 def execute_agent(
     task: Task,
     api_baseurl_key: OpenaiBaseUrlAndApiKey,
-    ajet_job: AgentJetJob,
+    ajet_job: AimeAgentConfigLike,
 ) -> WorkflowOutput:
     base_url = api_baseurl_key.base_url
     api_key = api_baseurl_key.api_key
diff --git a/tutorial/opencode_build_aime/auto_research/auto_train.py b/tutorial/opencode_build_aime/auto_research/auto_train.py
@@ -7,7 +7,6 @@
 """
 
 import os
-import sys
 import argparse
 import time
 import statistics
@@ -28,6 +27,12 @@
 DEFAULT_PROJECT_NAME = "subject14_aime_baseline_group_8_bs32"
 
 
+def scalar_reward(reward: float | list[float] | None) -> float:
+    if not isinstance(reward, (int, float)):
+        raise TypeError(f"Expected scalar reward, got {type(reward).__name__}: {reward!r}")
+    return float(reward)
+
+
 def agentjet_job_kwargs_from_args(args: argparse.Namespace) -> dict:
     job_arg_names = set(signature(AgentJetJob.__init__).parameters) - {"self"}
     return {name: value for name, value in vars(args).items() if name in job_arg_names}
@@ -119,7 +124,7 @@ def eval_rollout(self, task: Task) -> float:
         )
         try:
             workflow_output = execute_agent(task, api_baseurl_key, self.ajet_job)
-            return workflow_output.reward
+            return scalar_reward(workflow_output.reward)
         finally:
             self.swarm_worker.abort_episode(episode_uuid)
 
@@ -264,7 +269,7 @@ def rollout(self, task: Task) -> float:
         episode_uuid, api_baseurl_key = self.swarm_worker.begin_episode(discard_episode_timeout=120)
         workflow_output = execute_agent(task, api_baseurl_key, self.ajet_job)
         self.swarm_worker.end_episode(task, episode_uuid, workflow_output)
-        return workflow_output.reward
+        return scalar_reward(workflow_output.reward)
 
     def train(self):
         assert self.swarm_worker is not None and self.dataset is not None, "setup() must be called before train()"