Refactor MultiAgentContextTracker and ExtendedMessage for improved message handling; update oai_model_client to use asyncio for ZMQ communication; adjust math_agent.yaml model path; modify agent_roll_v3.py for batch size and experiment naming.

binary-husky · binary-husky · commit 0ab6d6b95c4a · 2026-04-22T18:27:15.000+08:00
diff --git a/ajet/context_tracker/multiagent_tracking.py b/ajet/context_tracker/multiagent_tracking.py
@@ -166,6 +166,7 @@ def step_spawn_timeline(self, messages: List[dict], tools: List = [], disable_to
             else:
                 author = "env"
 
+            # extract content block from openai-competible messages and convert to ExtendedMessage
             timeline += [
                 ExtendedMessage(
                     author=author,
@@ -235,6 +236,7 @@ def step_track(
 
         tool_calls = self.detect_tool_call_madness(llm_output)
 
+        # add llm_output to timeline and save
         llm_ext_msg = ExtendedMessage(
             author="llm",
             role="assistant",
diff --git a/ajet/schema/extended_msg.py b/ajet/schema/extended_msg.py
@@ -101,7 +101,7 @@ def __init__(
         self.manual_loss_mask_override = []
         self.lack_normal_eos = False
 
-        self.generate_content_for_compare(tokenizer=None)
+        self.generate_content_for_compare(content = self.content)
 
         self.eos_token_id = tokenizer.eos_token_id
 
@@ -173,7 +173,7 @@ def content_for_compare(self):
         if self._content_for_compare == "":
             if not self.tool_calls:
                 logger.exception("content_for_compare is not set, or previous llm output is empty!")
-                self._content_for_compare
+                # self._content_for_compare
         return self._content_for_compare
 
     @property
@@ -185,9 +185,8 @@ def need_training(self):
         ), f"author {self.author} is not identified"
         return self.author in NEED_TRAIN_AUTHORS
 
-    def generate_content_for_compare(self, tokenizer):
-        _content: str = self.content
-        self._content_for_compare = _content
+    def generate_content_for_compare(self, content):
+        self._content_for_compare = content
 
     def get_loss_mask(self, blackout_token_combo):
         if self.need_training:
@@ -302,6 +301,7 @@ def merge_tool_group(group, tokenizer):
             )
             merged_content = merged_content[len("<tool_response>\n") :]
             merged_content = merged_content[: -len("</tool_response>\n")]
+            # create merged tool response block
             merged = ExtendedMessage(
                 author=msg0.author,
                 role=msg0.role,
diff --git a/ajet/tuner_lib/experimental/oai_model_client.py b/ajet/tuner_lib/experimental/oai_model_client.py
@@ -5,19 +5,19 @@
 import os
 import time
 import zmq
-import json
+import zmq.asyncio
 
 from loguru import logger
 from typing import TYPE_CHECKING
 from ajet.tuner_lib.experimental.oai_model_server import InterchangeCompletionRequest
-from ajet.utils.thread_executors import SharedInferenceTrackerThreadExecutor, SharedInterchangeThreadExecutor
+from ajet.utils.thread_executors import SharedInterchangeThreadExecutor
 from ajet.tuner_lib.experimental.interchange_utils import get_zmq_socket
 from ajet.tuner_lib.experimental.interchange_utils import DEBUG
 
 if TYPE_CHECKING:
     pass
 
-context = zmq.Context()
+context = zmq.asyncio.Context()
 atexit.register(context.term)
 
 if TYPE_CHECKING:
@@ -72,11 +72,10 @@ def begin_service(self):
         if DEBUG: logger.info(f"[client] {self.episode_uuid} | Starting InterchangeClient service loop...")
         self.socket = context.socket(zmq.REP)
         self.socket.bind(f"{self.episode_contect_address}")
-        self.socket.setsockopt(zmq.RCVTIMEO, 1*1000)  # 1 second timeout for REP
 
         self.executor = SharedInterchangeThreadExecutor(self.max_inference_tracker_threads).get_shared_executor()
-        if DEBUG: logger.info(f"[client] {self.episode_uuid} | Submitting _begin_service_threading to executor...")
-        future = self.executor.submit(self._begin_service_threading)
+        if DEBUG: logger.info(f"[client] {self.episode_uuid} | Submitting _run_service_loop to executor...")
+        future = self.executor.submit(self._run_service_loop)
 
         # wait till service begin running
         wait_time = 1
@@ -94,26 +93,33 @@ def begin_service(self):
         return self.episode_contect_address
 
 
-    def _begin_service_threading(self):
-        """begin listening for service requests in a threading model
+    def _run_service_loop(self):
+        """Runs a dedicated asyncio event loop for this episode's zmq service.
+        """
+        loop = asyncio.new_event_loop()
+        asyncio.set_event_loop(loop)
+        try:
+            loop.run_until_complete(self._begin_service_async())
+        finally:
+            loop.close()
+            asyncio.set_event_loop(None)
+
+
+    async def _begin_service_async(self):
+        """begin listening for service requests using zmq.asyncio
         """
 
         begin_time = time.time()
         ever_receive_anything = False
         if DEBUG: logger.info(f"[client] {self.episode_uuid} | Starting ZMQ socket bind complete")
 
+        poller = zmq.asyncio.Poller()
+        poller.register(self.socket, zmq.POLLIN)
+
         try:
             while not self.should_hard_terminate:
-                try:
-
-                    # <wait for>:
-                    #   <from_sourcefile>: ajet/tuner_lib/experimental/oai_model_server.py
-                    #   <from_code>: socket.send_string(int_req.model_dump_json())
-                    #   <expect>: InterchangeCompletionRequest object in JSON string format
-                    message = self.socket.recv_string()
-
-                    ever_receive_anything = True
-                except zmq.Again as e:
+                events = dict(await poller.poll(timeout=1000))  # 1 second
+                if self.socket not in events:
                     if self.should_hard_terminate:
                         # abort_episode()
                         if DEBUG: logger.info(f"[client] {self.episode_uuid} | episode over")
@@ -123,51 +129,35 @@ def _begin_service_threading(self):
                         if DEBUG: logger.warning(f"[client] {self.episode_uuid} | Still waiting for first message... (time passed {timepassed}) for episode_uuid:{self.episode_uuid}...")
                     continue
 
+                # <wait for>:
+                #   <from_sourcefile>: ajet/tuner_lib/experimental/oai_model_server.py
+                #   <from_code>: socket.send_string(int_req.model_dump_json())
+                #   <expect>: InterchangeCompletionRequest object in JSON string format
+                message = await self.socket.recv_string()
+                ever_receive_anything = True
+
                 # parse the incoming request
                 if DEBUG: logger.info(f"[client] {self.episode_uuid} | before json.loads(message)")
                 data_as_json = json.loads(message)
                 parsed_msg = InterchangeCompletionRequest(**data_as_json)
 
-                # begin to run the llm request, monitored by context tracker
-                # we re-use previously created thread for best performance
-                if DEBUG: logger.info(f"[client] {self.episode_uuid} | before asyncio run self.llm_infer")
-
-                # Check if there's a running event loop
-                try:
-                    loop = asyncio.get_running_loop()
-                    created_new_loop = False
-                except RuntimeError:
-                    # No running loop, create a new one
-                    loop = asyncio.new_event_loop()
-                    asyncio.set_event_loop(loop)
-                    created_new_loop = True
-
-                try:
-                    context_tracker_executor = SharedInferenceTrackerThreadExecutor(self.max_inference_tracker_threads).get_shared_executor()
-                    future = loop.run_in_executor(
-                        context_tracker_executor,
-                        asyncio.run,
-                        self.llm_proxy_with_tracker.chat_completion_request(
-                            req=parsed_msg.completion_request,
-                            timeline_uuid=parsed_msg.timeline_uuid,
-                            agent_name=parsed_msg.agent_name,
-                            target_tag=parsed_msg.target_tag,
-                            episode_uuid=parsed_msg.episode_uuid,
-                        )
-                    )
-                    result = loop.run_until_complete(future).model_dump_json()  # type: ignore
-                finally:
-                    # Clean up the event loop if we created it
-                    if created_new_loop:
-                        loop.close()
-                        asyncio.set_event_loop(None)
+                # run the llm request, monitored by context tracker
+                if DEBUG: logger.info(f"[client] {self.episode_uuid} | before awaiting self.llm_infer")
+                response = await self.llm_proxy_with_tracker.chat_completion_request(
+                    req=parsed_msg.completion_request,
+                    timeline_uuid=parsed_msg.timeline_uuid,
+                    agent_name=parsed_msg.agent_name,
+                    target_tag=parsed_msg.target_tag,
+                    episode_uuid=parsed_msg.episode_uuid,
+                )
+                result = response.model_dump_json()
 
                 if DEBUG: logger.info(f"[client] {self.episode_uuid} | before send_string (send llm call result)")
 
                 # <send to>
                 #   <to_sourcefile>: ajet/tuner_lib/experimental/oai_model_server.py
                 #   <to_code>: result_str = socket.recv_string()
-                self.socket.send_string(result)
+                await self.socket.send_string(result)
 
                 if DEBUG: logger.info(f"[client] {self.episode_uuid} | after send_string (send llm call result)")
         except:
diff --git a/tutorial/example_math_agent/math_agent.yaml b/tutorial/example_math_agent/math_agent.yaml
@@ -15,13 +15,13 @@ ajet:
 
   model:
     # ✨✨✨✨ set the model to be trained
-    path: Qwen/Qwen2.5-7B
+    path: /mnt/data_cpfs/model_cache/modelscope/hub/Qwen/Qwen/Qwen2___5-7B-Instruct
 
   rollout:
-    user_workflow: "tutorial.example_math_agent.math_agent->ExampleMathLearn" # ✨✨✨✨ write and select workflow
+    # user_workflow: "tutorial.example_math_agent.math_agent->ExampleMathLearn" # ✨✨✨✨ write and select workflow
     # user_workflow: "tutorial.example_math_agent.math_agent_langchain->ExampleMathLearn"                     # ✨if you prefer langchain version
     # user_workflow: "tutorial/example_math_agent/math_agent_oai_sdk.py->ExampleMathLearn_Simple_NoToolCall"  # ✨if you prefer openai sdk version without toolcall
-    # user_workflow: "tutorial/example_math_agent/math_agent_oai_sdk.py->ExampleMathLearn"                    # ✨if you prefer openai sdk version with toolcall
+    user_workflow: "tutorial/example_math_agent/math_agent_oai_sdk.py->ExampleMathLearn"                    # ✨if you prefer openai sdk version with toolcall
     # user_workflow: "tutorial/example_math_agent/math_agent_raw_http.py->ExampleMathLearn"                   # ✨if you do not want to use any agentic framwork at all
     # user_workflow: "tutorial/example_math_agent/math_agent_simplify.py->MathToolWorkflow"                   # ✨if you prefer to compute reward inside workflow
     temperature: 1.0
diff --git a/tutorial/opencode_build_aime/agent_roll_v3.py b/tutorial/opencode_build_aime/agent_roll_v3.py
@@ -25,16 +25,17 @@
 REMOTE_MODEL_PATH = os.getenv("REMOTE_MODEL_PATH", "/mnt/data_cpfs/xielipeng.xlp/models/Qwen3-14B")
 BATCH_SIZE = 32
 NUM_REPEAT = 8
-MINI_BATCH_NUM = 2
+MINI_BATCH_NUM = 1
 ajet_job = AgentJetJob(
     algorithm="grpo",
-    experiment_name="aime_swarm_14b_v33_2",
+    experiment_name="aime_swarm_14b_v33_ppoepoch4",
     max_env_worker=128,
     n_gpu=8,
     model=REMOTE_MODEL_PATH,
     batch_size=BATCH_SIZE,
     swarm_mode_sample_collection_method="rollout_until_finish_enough_non_dummy_tasks",
     num_repeat=NUM_REPEAT,
+    ppo_epochs=4,
     mini_batch_num=MINI_BATCH_NUM,
     logging="swanlab",
     max_prompt_length=3000,