modelscope
diff --git a/‎ajet/context_tracker/multiagent_tracking.py‎
Lines changed: 3 additions & 1 deletion b/‎ajet/context_tracker/multiagent_tracking.py‎
Lines changed: 3 additions & 1 deletion
diff --git a/‎ajet/copilot/train-complex-blackbox/SKILL.md‎
Lines changed: 174 additions & 0 deletions b/‎ajet/copilot/train-complex-blackbox/SKILL.md‎
Lines changed: 174 additions & 0 deletions
diff --git a/‎ajet/copilot/write-swarm-client/SKILL.md‎
Lines changed: 65 additions & 22 deletions b/‎ajet/copilot/write-swarm-client/SKILL.md‎
Lines changed: 65 additions & 22 deletions
diff --git a/‎ajet/schema/extended_msg.py‎
Lines changed: 2 additions & 0 deletions b/‎ajet/schema/extended_msg.py‎
Lines changed: 2 additions & 0 deletions
@@ -334,7 +334,9 @@ def detect_tool_call_madness(self, llm_output):
             # llm_output["tool_calls"] is not None, and is not []
             tool_calls = llm_output["tool_calls"]
             if "wrong_toolcall" in self.config.ajet.rollout.compute_madness_checklist:
-                copy_tool_calls = copy.deepcopy(tool_calls)
+                # copy_tool_calls = copy.deepcopy(tool_calls)
+                # Shallow copy is sufficient - we're only reading the data
+                copy_tool_calls = tool_calls
                 wrong_toolcall = False
                 for i in range(len(copy_tool_calls)):
                     if ("function" in copy_tool_calls[i]) and (
 
@@ -0,0 +1,174 @@
+---
+name: train-complex-blackbox
+description: Create a trainable agent loop or agent workflow with AgentJet
+license: Complete terms in LICENSE.txt
+---
+
+
+## 0. Ask user for API key + model (or API key + base url + model) for debugging
+
+This is not 100% necessary, but it can help a lot in debugging in step 1.
+If user has not given a API, ask user to give your one.
+
+
+By default, the code you write should be located at ./tutorial/opencode_build_xxxxxx/*.py
+
+## 1. Initial Programming
+
+### Writing dataset collector (`get_training_dataset_item_list.py`)
+- `get_training_dataset_item_list.py`: Returns a list of training data items. Maybe a list of training tasks, each item is a string identifier of a training task, or a dict containing necessary information for the training task.
+
+### Episode Runner (`run_episode_once.py`)
+- `run_episode_once.py`:
+
+  - Argument Parser: takes (training data item identifier + api-key + base-url) as input, model-name is not required, you can make up a model name because we ignore it.
+
+  - Execute the agent: read the document of the agent user asked you to train, figure out how to execute the agent. In most cases you can use subprocess to start a commandline process to execute the agent, your biggest issue is to figure out how to pass the training data item identifier, api-key and base-url to that commandline process. You can also use python code to execute the agent if you think it's more convenient.
+
+  - Reward: extract / compute the reward/score for the agent's output. Some agents have clear reward sigal, but others don't.
+    - clear reward signal: take that down as the reward, no need to do extra reward engineering.
+    - no clear reward signal: you need to design a reward function to compute the reward/score for the agent's output. You can use another LLM to help you design the reward function, or you can design it by yourself if you have domain knowledge.
+
+
+### Test
+
+Remember to test these two parts before moving to step 2, make sure they work as expected.
+
+
+
+## 2. Writing training code
+
+This part is easy, simply follow this template and change the necessary part such as dataset path, model name, etc.
+
+`agent_roll.py`
+
+```python
+# -*- coding: utf-8 -*-
+
+import os
+import re
+import requests
+from textwrap import dedent
+from ajet.schema.task import Task, WorkflowOutput
+from ajet.copilot.job import AgentJetJob
+from ajet.task_reader import RouterTaskReader
+from ajet.utils.thread_executors import PeriodicDrainThreadPoolExecutor
+from ajet.tuner_lib.as_oai_baseurl_apikey import OpenaiBaseUrlAndApiKey
+from ajet.default_config.ajet_default import AjetTaskReader, HuggingfaceDatRepo
+from ajet.tuner_lib.experimental.as_swarm_client import SwarmClient
+
+# python -m tutorial.example_math_swarm.math
+
+GRPO_N = 4  # grpo group size
+NUM_EPOCH = 10000
+AJET_SWARM_URL = os.getenv("AJET_SWARM_URL", "http://localhost:10086")
+REMOTE_MODEL_PATH = os.getenv("REMOTE_MODEL_PATH", "/mnt/data_cpfs/model_cache/modelscope/hub/Qwen/Qwen/Qwen2.5-7B-Instruct")
+REMOTE_BATCH_SIZE = 32
+REMOTE_ALLOCATE_GPU_PER_NODE = 8
+
+def main():
+
+    # Handshake with swarm remote, then send training param to swarm remote (such as model to be trained, algorithm, etc)
+    dataset = RouterTaskReader(
+        reader_type = "huggingface_dat_repo",
+        reader_config = AjetTaskReader(
+            huggingface_dat_repo = HuggingfaceDatRepo(
+                dataset_path = '/mnt/data_cpfs/model_cache/modelscope/dataset/openai/gsm8k/main',
+                # dataset_path = "/root/agentjet/benchmark_datasets/dataset/gsm8k/socratic",
+                # dataset_path = "openai/gsm8k",
+                # dataset_name = "main",
+            )
+        )
+    )
+    # Load the CountDown dataset
+    # print(f"Loading dataset from: {LOCAL_DATASET_PATH}")
+    # dataset = RouterTaskReader(
+    #     reader_type="jsonl_dataset_file",
+    #     reader_config=AjetTaskReader(
+    #         jsonl_dataset_file=JsonlDatasetFile(
+    #             training=JsonlTrainingFp(file_path=LOCAL_DATASET_PATH)
+    #         )
+    #     ),
+    # )
+
+    # Hand shake with remote swarm server
+    swarm_worker = SwarmClient(AJET_SWARM_URL)
+    ajet_job = AgentJetJob(
+        experiment_name="math_gsm8k_grpo",
+        algorithm="grpo",
+        n_gpu=REMOTE_ALLOCATE_GPU_PER_NODE,
+        model=REMOTE_MODEL_PATH,
+        batch_size=REMOTE_BATCH_SIZE,
+        num_repeat=GRPO_N,
+    )
+    print(ajet_job.config.to_dict())
+    swarm_worker.auto_sync_train_config_and_start_engine(
+        ajet_job,
+        force_restart=True,
+    )
+
+    def rollout(task):
+        # begin episode
+        episode_uuid, api_baseurl_key = swarm_worker.begin_episode(discard_episode_timeout=60)
+        # execute agent ( base_url = api_baseurl_key.base_url, api_key = api_baseurl_key.api_key )
+        workflow_output = execute_agent(task, api_baseurl_key)  # reward is in `workflow_output`
+        # report output back to swarm remote
+        swarm_worker.end_episode(task, episode_uuid, workflow_output)
+        return
+
+    executor = PeriodicDrainThreadPoolExecutor(workers=GRPO_N * REMOTE_BATCH_SIZE, auto_retry=True)
+    for _ in range(NUM_EPOCH):
+        for _, task in enumerate(dataset.generate_training_tasks()):
+            for _ in range(GRPO_N):
+                executor.submit_with_periodic_drain(fn=rollout, task=task)
+
+    return None
+
+
+def execute_agent(task: Task, api_baseurl_key: OpenaiBaseUrlAndApiKey):
+    ....
+    raw_reward: float = ...  # compute the reward for the agent's output
+    return WorkflowOutput(reward=raw_reward, metadata={"important_metadata": important_metadata})
+
+
+if __name__ == "__main__":
+    main()
+
+
+```
+
+
+It is very clear now, your job in step 2 is to:
+
+- use `get_training_dataset_item_list.py` to generate `List[Task]` (`from ajet.schema.task import Task`)
+- use `run_episode_once.py` to execute a single episode and place it in `execute_agent` function
+
+
+## 3. Simplify your code and fix bugs
+
+before moving to step 4, you can simplify your code and fix bugs to make sure it can run smoothly.
+
+
+## 4. Training
+
+Finally, you can start training.
+
+Run `ajet-swarm start` to start training server (if the user has already installed agentjet swarm environment),
+if the user has docker environment, you can also refer to `docs/en/ajet-swarm-docker.md` to start a AgentSwarm docker container.
+
+Create a duplication of `agent_roll.py` named `agent_roll_one_episode_debug.py`, and modify it to only run one episode, this can help you debug whether the episode runner and reward function work as expected.
+
+After the server side is ready, run
+```bash
+python /path/to/agent_roll_one_episode_debug.py
+```
+watch console log to see if the episode can be executed successfully and reward can be computed correctly.
+
+If anything goes wrong, keep server running, rewrite and fix `agent_roll_one_episode_debug.py`, and run it again until it can run one episode successfully.
+
+Next, patch `agent_roll.py` if there are any bugs discorvered via the debugging of `agent_roll_one_episode_debug.py`, and then run
+```bash
+python /path/to/agent_roll.py
+```
+
+to start the training!
@@ -4,25 +4,24 @@ description: Create a trainable agent loop or agent workflow with AgentJet
 license: Complete terms in LICENSE.txt
 ---
 
-## 简介：
 
-你的任务是根据要求，创建一个可训练 Agent （或者Agent Loop，多智能体系统等等），提供给用户做强化学习训练。
-在AgentJet强化学习框架下，这是非常简单的。
+## Introduction:
 
-首先，根据用户的要求，给智能体系统起一个名字，例如 user_math_agent
+Your task is to create a trainable Agent (or Agent Loop, multi-agent system, etc.) based on the requirements, and provide it to the user for reinforcement learning training. Under the AgentJet reinforcement learning framework, this is very simple.
 
-其次，创建文件：
-tutorial/user_math_agent
+First, give the agent system a name based on the user's requirements, for example `user_math_agent`.
 
-接下来，创建Agent源文件：
-tutorial/user_math_agent/agent_roll.py   (以 tutorial/example_academic_trans_swarm/trans_roll.py 为模板，变化不大，关键是向用户索取必要的参数)
-tutorial/user_math_agent/agent_run.py   （根据用户的要求，创建运行智能体的函数，或者类，都可以。同步异步都可以。）
-tutorial/user_math_agent/readme.md      （Agent说明，以及训练、调试方法说明）
+Next, create the directory:
+`tutorial/user_math_agent`
 
+Then, create the Agent source files:
+- `tutorial/user_math_agent/agent_roll.py` (Use `tutorial/example_academic_trans_swarm/trans_roll.py` as a template. There aren't many changes — the key is to ask the user for the necessary parameters.)
+- `tutorial/user_math_agent/agent_run.py` (Create the function or class to run the agent based on the user's requirements. Synchronous or asynchronous are both fine.)
+- `tutorial/user_math_agent/readme.md` (Agent description, along with training and debugging instructions.)
 
-## 智能体编写方法
+## How to Write the Agent
 
-使用 OpenAI SDK 编写智能体，主要包含以下三个函数（以及必要的子函数和子模块）：
+Write the agent using the OpenAI SDK. It mainly includes the following three functions (along with any necessary sub-functions and sub-modules):
 
 ```
 from ajet.schema.task import Task, WorkflowOutput
@@ -31,24 +30,68 @@ def _compute_reward(...)
 
 def _execute_agent(...)
 
-def run_agent_and_compute_reward(task: Task, base_url:string, api_key:string) -> WorkflowOutput:
+def run_agent_and_compute_reward(task: Task, base_url: string, api_key: string) -> WorkflowOutput:
 ```
 
-在 agent_roll 中，直接import run_agent_and_compute_reward即可。
+In `agent_roll`, simply import `run_agent_and_compute_reward`.
 
-- 智能体的编写要领：通过一个或几个Agent的协作，高效完成用户给定的任务。
-- 奖励编写的要领：容易验证的，使用规则直接计算。不容易验证的，模仿 `tutorial/example_academic_trans_swarm/train_multi_model/trans_reward.py` 中的方法，使用其他大型模型生成 LLM as Judge 程序。
+- **Key points for writing the agent:** Efficiently complete the user's given task through the collaboration of one or several Agents.
+- **Key points for writing the reward:** For things that are easy to verify, calculate directly using rules. For things that are hard to verify, follow the approach in `tutorial/example_academic_trans_swarm/train_multi_model/trans_reward.py` and use other large models to create an LLM-as-Judge program.
 
+## Training and Debugging Instructions
 
-## 训练、调试方法说明
+Overall, the user first runs `ajet-swarm start`, then runs `agent_roll.py`, and training begins. You do not need to and are not allowed to run these bash commands.
+- First, help the user write `agent_run.py` and `agent_roll.py`.
+- Then, write clear instructions to guide the user through training (`readme.md`).
 
-总体而言，就是用户先运行 `ajet-swarm start`, 然后再运行 `agent_roll.py` 训练就开始了。你不需要也不被允许运行这些bash命令。
-- 首先帮助用户写好 `agent_run.py` 和 `agent_roll.py`，
-- 然后写清楚引导用户训练的说明（readme.md），
-你的任务就完成了。
+Your task is then complete.
 
-以下是一些参考资料。
+Below are some reference materials.
 
+---
+
+## Introduction:
+
+Your task is to create a trainable Agent (or Agent Loop, multi-agent system, etc.) based on the requirements, and provide it to the user for reinforcement learning training. Under the AgentJet reinforcement learning framework, this is very simple.
+
+First, give the agent system a name based on the user's requirements, for example `user_math_agent`.
+
+Next, create the directory:
+`tutorial/user_math_agent`
+
+Then, create the Agent source files:
+- `tutorial/user_math_agent/agent_roll.py` (Use `tutorial/example_academic_trans_swarm/trans_roll.py` as a template. There aren't many changes — the key is to ask the user for the necessary parameters.)
+- `tutorial/user_math_agent/agent_run.py` (Create the function or class to run the agent based on the user's requirements. Synchronous or asynchronous are both fine.)
+- `tutorial/user_math_agent/readme.md` (Agent description, along with training and debugging instructions.)
+
+## How to Write the Agent
+
+Write the agent using the OpenAI SDK. It mainly includes the following three functions (along with any necessary sub-functions and sub-modules):
+
+```
+from ajet.schema.task import Task, WorkflowOutput
+
+def _compute_reward(...)
+
+def _execute_agent(...)
+
+def run_agent_and_compute_reward(task: Task, base_url: string, api_key: string) -> WorkflowOutput:
+```
+
+In `agent_roll`, simply import `run_agent_and_compute_reward`.
+
+- **Key points for writing the agent:** Efficiently complete the user's given task through the collaboration of one or several Agents.
+- **Key points for writing the reward:** For things that are easy to verify, calculate directly using rules. For things that are hard to verify, follow the approach in `tutorial/example_academic_trans_swarm/train_multi_model/trans_reward.py` and use other large models to create an LLM-as-Judge program.
+
+## Training and Debugging Instructions
+
+Overall, the user first runs `ajet-swarm start`, then runs `agent_roll.py`, and training begins. You do not need to and are not allowed to run these bash commands.
+- First, help the user write `agent_run.py` and `agent_roll.py`.
+- Then, write clear instructions to guide the user through training (`readme.md`).
+
+Your task is then complete.
+
+Below are some reference materials.
     ---
 
     # Using AgentJet Swarm to Train Your Agents
 
@@ -244,9 +244,11 @@ def get_inc_simple(self, text_frag_from, text_frag_to, tokenizer):
         tokenizer_output = tokenizer(text_frag_from, return_tensors="pt", padding=False)
         tokenizer_input_ids = tokenizer_output["input_ids"][0].tolist()
         token_ids_acc = tokenizer_input_ids
+        del tokenizer_output  # Free memory immediately
 
         tokenizer_output = tokenizer(text_frag_to, return_tensors="pt", padding=False)
         input_ids = tokenizer_output["input_ids"][0].tolist()
+        del tokenizer_output  # Free memory immediately
         # get the new tokens added in this step
         input_id_increment = input_ids[len(token_ids_acc) :]
         FN_DEBUG = False