modelscope
diff --git a/‎ajet/__init__.py‎
Lines changed: 27 additions & 6 deletions b/‎ajet/__init__.py‎
Lines changed: 27 additions & 6 deletions
diff --git a/‎ajet/context_tracker/multiagent_tracking.py‎
Lines changed: 2 additions & 0 deletions b/‎ajet/context_tracker/multiagent_tracking.py‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎ajet/copilot/job.py‎
Lines changed: 2 additions & 0 deletions b/‎ajet/copilot/job.py‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎ajet/default_config/ajet_default.yaml‎
Lines changed: 1 addition & 1 deletion b/‎ajet/default_config/ajet_default.yaml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎ajet/default_config/ajet_ts_default.yaml‎
Lines changed: 5 additions & 2 deletions b/‎ajet/default_config/ajet_ts_default.yaml‎
Lines changed: 5 additions & 2 deletions
diff --git a/‎ajet/task_rollout/native_parallel_worker.py‎
Lines changed: 34 additions & 49 deletions b/‎ajet/task_rollout/native_parallel_worker.py‎
Lines changed: 34 additions & 49 deletions
diff --git a/‎ajet/task_rollout/single_worker.py‎
Lines changed: 56 additions & 1 deletion b/‎ajet/task_rollout/single_worker.py‎
Lines changed: 56 additions & 1 deletion
diff --git a/‎ajet/task_runner/base_runner.py‎
Lines changed: 7 additions & 3 deletions b/‎ajet/task_runner/base_runner.py‎
Lines changed: 7 additions & 3 deletions
diff --git a/‎ajet/task_runner/general_runner.py‎
Lines changed: 2 additions & 2 deletions b/‎ajet/task_runner/general_runner.py‎
Lines changed: 2 additions & 2 deletions
@@ -1,8 +1,4 @@
-from ajet.copilot.job import AgentJetJob
-from ajet.schema.task import WorkflowOutput, WorkflowTask
-from ajet.tuner import AjetTuner
-from ajet.workflow import Workflow
-from ajet.utils.vsdb import vscode_conditional_breakpoint as bp
+__version__ = "0.1.0"
 
 __all__ = [
     "Workflow",
@@ -13,4 +9,29 @@
     "bp"
 ]
 
-__version__ = "0.1.0"
+_LAZY_IMPORTS = {
+    "AjetTuner": "ajet.tuner",
+    "AgentJetJob": "ajet.copilot.job",
+    "WorkflowOutput": "ajet.schema.task",
+    "WorkflowTask": "ajet.schema.task",
+    "Workflow": "ajet.workflow",
+    "bp": "ajet.utils.vsdb",
+}
+
+_ATTR_MAPPING = {
+    "bp": "vscode_conditional_breakpoint"
+}
+
+def __getattr__(name):
+    if name in _LAZY_IMPORTS:
+        import importlib
+        module_path = _LAZY_IMPORTS[name]
+        module = importlib.import_module(module_path)
+
+        attr_name = _ATTR_MAPPING.get(name, name)
+        value = getattr(module, attr_name)  # type: ignore
+
+        globals()[name] = value
+        return value
+
+    raise AttributeError(f"module '{__name__}' has no attribute '{name}'")
@@ -49,12 +49,14 @@ def __init__(
         tokenizer: PreTrainedTokenizer,
         config,
         should_interrupt_fn,
+        should_interrupt_hard_fn,
         generated_token_callback_fn,
         **kwargs,
     ):
         super().__init__(config, tokenizer, **kwargs)
         self.tokenizer = tokenizer
         self.should_interrupt_fn = should_interrupt_fn
+        self.should_interrupt_hard_fn = should_interrupt_hard_fn
         self.generated_token_callback_fn = generated_token_callback_fn
         self.context_overflow = False
         self.output_kwargs = {}
 
@@ -44,6 +44,7 @@ def __init__(
         algorithm: str = "grpo",
         n_gpu_for_infer: int | None = None, # only for trinity backbone
         grpo_n: int = 8,
+        batch_size: int = 32,
         tinkerscript_mode: bool = True,
         *kwargs,
     ) -> None:
@@ -60,6 +61,7 @@ def __init__(
         self.config.ajet.trainer_common.n_gpus_per_node = n_gpu
         self.config.ajet.trainer_common.algorithm.adv_estimator = algorithm
         self.config.ajet.rollout.num_repeat = grpo_n
+        self.config.ajet.data.train_batch_size = batch_size
         if n_gpu_for_infer is None and backbone == "trinity":
             raise ValueError("Please specify `n_gpu_for_infer` (n_gpu_for_infer < n_gpu) for trinity backbone.")
         if (n_gpu_for_infer is not None) and backbone == "verl":
 
@@ -290,7 +290,7 @@ ajet:
     interchange_method: 'ipc' # options: 'tcp' (multi-nodes) or  'ipc' (1 node)
     interchange_server_port: 'auto'
     num_fastapi_process: 2  # 1, 2 or 4 is fine
-    max_fastapi_threads: 128  # 64 or 128 is fine
+    max_fastapi_threads: 512  # 64 or 128 is fine
     max_inference_tracker_threads: 64 # recommend to be equal to `ajet.rollout.max_env_worker`
     already_started: False # do not edit, used by `tinkerscript`
 
 
@@ -7,7 +7,7 @@ ajet:
 
   model:
     # which model should be trained
-    path: /mnt/data_cpfs/model_cache/modelscope/hub/Qwen/Qwen/Qwen2___5-14B-Instruct
+    path: /mnt/data_cpfs/model_cache/modelscope/hub/Qwen/Qwen/Qwen2___5-3B-Instruct
 
   rollout:
     # the path to the workflow class
@@ -29,10 +29,13 @@ ajet:
     interchange_method: 'ipc' # options: 'tcp' (multi-nodes) or  'ipc' (1 node)
     interchange_server_port: 10086
     num_fastapi_process: 2  # 1, 2 or 4 is fine
-    max_fastapi_threads: 128  # 64 or 128 is fine
+    max_fastapi_threads: 512  # 64 or 128 is fine
     max_inference_tracker_threads: 64 # recommend to be equal to `ajet.rollout.max_env_worker`
     already_started: False # do not edit, used by `tinkerscript`
 
+  rollout:
+    # maximum number of parallel environments / simulate workers
+    max_env_worker: 128
 
 
 # ------------------ 不需要修改 ------------------
 
@@ -8,17 +8,19 @@
 
 import numpy as np
 import torch
+import threading
 from loguru import logger
 from tensordict import TensorDict
 from torch.nn.utils.rnn import pad_sequence
 from tqdm import tqdm
 from verl import DataProto
 from verl.utils.torch_functional import pad_sequence_to_length
 
-from ajet.context_tracker.basic_tracker import BaseContextTracker
 from ajet.schema.task import Task
 from ajet.schema.trajectory import Sample
 from ajet.task_rollout.single_worker import BaseRolloutManager
+from ajet.context_tracker.basic_tracker import BaseContextTracker
+from ajet.tuner_lib.weight_tuner.experimental.interchange_utils import http_change_engine_status
 
 
 class DynamicRolloutManager(BaseRolloutManager):
@@ -481,33 +483,39 @@ def rollout_swarm(  # noqa: C901
         tracker_array: List[BaseContextTracker] = []
         assert mode != "validate"
         rollout_n = self.rollout_n
-        n_task = len(tasks)
+        n_batch_task = len(tasks)
+        n_task = min(len(tasks), self.max_parallel // rollout_n)
+        assert n_task > 0, f"n_task is not valid, n_task = min(len(tasks), self.max_parallel // rollout_n) = {n_task}"
         self.current_token_count_time = time.time()
 
         # initialize observation window
         observation_window: Dict[str, List[int | bool | str]] = {
             "info": ["" for _ in range(n_task * rollout_n)],
             "step": [0 for _ in range(n_task * rollout_n)],
             "stop": [False for _ in range(n_task * rollout_n)],
+            "hard_stop": [False for _ in range(n_task * rollout_n)],
             "token": [0 for _ in range(n_task * rollout_n)],
         }
         executor = ThreadPoolExecutor(max_workers=self.max_parallel)
         futures: List[Future] = []
         completed_task_id_map_ct: Dict[str, List[BaseContextTracker]] = {}
+        executor_lock = threading.Lock()
 
         # submit initial tasks
         dummy_task = Task(main_query="dummy task")
         for task_batch_index in range(n_task):
             for task_rollout_index in range(rollout_n):
                 task_thread_index = task_batch_index * rollout_n + task_rollout_index
                 future = executor.submit(
-                    self.rollout_env_worker,
+                    self.rollout_env_worker_loop,
                     task=dummy_task,
                     task_tag="",
                     mode=mode,
                     task_batch_index=task_batch_index,
                     task_thread_index=task_thread_index,
                     observation_window=observation_window,
+                    completed_task_id_map_ct=completed_task_id_map_ct,
+                    executor_lock=executor_lock,
                 )
                 observation_window["info"][task_thread_index] = "1"
                 futures.append(future)
@@ -516,14 +524,15 @@ def enough_sample_stop_condition(completed_task_id_map_ct) -> bool:
             n = 0
             for ct_list in completed_task_id_map_ct.values():
                 n += len(ct_list)
-            return (n >= n_task * rollout_n)
+            print(f"Current collected samples: {n}, target: {n_batch_task * rollout_n}")
+            return (n >= n_batch_task * rollout_n)
 
         def enough_finished_task_stop_condition(completed_task_id_map_ct) -> bool:
             n_finish_roll_task = 0
             for ct_list in completed_task_id_map_ct.values():
                 if len(ct_list) >= rollout_n:
                     n_finish_roll_task += 1
-            return (n_finish_roll_task >= n_task)
+            return (n_finish_roll_task >= n_batch_task)
 
         def enough_non_dummy_task_stop_condition(completed_task_id_map_ct) -> bool:
             n_finish_roll_task = 0
@@ -535,63 +544,39 @@ def enough_non_dummy_task_stop_condition(completed_task_id_map_ct) -> bool:
                     all_equal = all(x == task_cmd_reward_array[0] for x in task_cmd_reward_array)
                     if all_equal: continue
                     n_finish_roll_task += 1
-            return (n_finish_roll_task >= n_task)
+            return (n_finish_roll_task >= n_batch_task)
 
         stop_condition = enough_sample_stop_condition
 
-        def force_stop_all_threads():
-            for k in range(len(observation_window["stop"])):
-                observation_window["stop"][k] = True
+        def stop_all_threads_soft():
+            for k in range(len(observation_window["stop"])): observation_window["stop"][k] = True
+            http_change_engine_status(self.config, "ENGINE.ROLLING_POST")
+            return
+
+        def stop_all_threads_hard():
+            for k in range(len(observation_window["hard_stop"])): observation_window["hard_stop"][k] = True
+            http_change_engine_status(self.config, "ENGINE.WEIGHT_SYNCING")
             return
 
-        tic = time.time()
+        cnt = 0
         while True:
-            # wait for a completed task
-            done_arr, pending_arr = wait(futures, timeout=10, return_when=FIRST_COMPLETED)
-            print(f"Done tasks: {len(done_arr)}, Pending tasks: {len(pending_arr)}")
-            toc = time.time()
-            if (toc - tic) > 8:
-                tic = toc
+            cnt += 1
+            time.sleep(2)
+            if (cnt % 5 == 0):
                 self.step_status_printer(observation_window)
-            # get result
-            for future in done_arr:
-                ct: BaseContextTracker = future.result()
-                if ct.task_id not in completed_task_id_map_ct:
-                    completed_task_id_map_ct[ct.task_id] = [ct]
-                else:
-                    completed_task_id_map_ct[ct.task_id] += [ct]
-            # if meet stop condition
             meet_stop_condition_after_new_results = stop_condition(completed_task_id_map_ct)
             if meet_stop_condition_after_new_results:
-                force_stop_all_threads()
+                print("Sending soft stop signal to all threads...")
+                stop_all_threads_soft()
                 break
-            else:
-                # re-spawn new tasks for done futures
-                for task_batch_index in range(n_task):
-                    for task_rollout_index in range(rollout_n):
-                        task_thread_index = task_batch_index * rollout_n + task_rollout_index
-                        has_done = (futures[task_thread_index] in done_arr)
-
-                        observation_window["info"][task_thread_index] = str(int(observation_window["info"][task_thread_index]) + 1)
-                        observation_window["stop"][task_thread_index] = False
-                        observation_window["step"][task_thread_index] = 0
-
-                        if has_done:
-                            print(f"Re-spawning thread {task_thread_index}...")
-                            future = executor.submit(
-                                self.rollout_env_worker,
-                                task=dummy_task,
-                                task_tag="",
-                                mode=mode,
-                                task_batch_index=task_batch_index,
-                                task_thread_index=task_thread_index,
-                                observation_window=observation_window,
-                            )
-                            futures[task_thread_index] = future
 
         # wait for all threads to complete
         print('Finalizing all threads...')
-        wait(futures, return_when=ALL_COMPLETED)
+        executor.shutdown(wait=True)
+
+        # stop all threads hard
+        print("Sending hard stop signal to all threads...")
+        stop_all_threads_hard()
 
         # build tracker_array
         print('Collecting results...')
 
@@ -1,10 +1,13 @@
 """Single worker primitives for environment rollouts."""
 
 import uuid
+import time
+import threading
 from typing import Literal
 
 from loguru import logger
 from omegaconf import DictConfig
+from typing import Dict, List, Literal
 from transformers.tokenization_utils import PreTrainedTokenizer
 
 from ajet.context_tracker.basic_tracker import BaseContextTracker
@@ -14,9 +17,9 @@
 from ajet.task_runner.general_runner import GeneralRunner
 from ajet.task_runner.tinkerscript_runner import TinkerScriptRunner
 from ajet.utils.retry import retry_with_backoff
+from ajet.utils.retry import SwarmReceiveAbortException
 from ajet.utils.sample import get_sample_params
 from ajet.utils.testing_utils import TestFailException, TestSuccessException
-from ajet.task_runner.tinkerscript_runner import SwarmReceiveAbortException
 
 
 class BaseRolloutManager:
@@ -125,6 +128,7 @@ def rollout_env_worker(
                     workflow_task=workflow_task,
                 )
             except SwarmReceiveAbortException as exc:  # noqa: BLE001
+                print('SwarmReceiveAbortException caught in rollout_env_worker')
                 return None # type: ignore
             except TestSuccessException as e:
                 logger.success(
@@ -141,3 +145,54 @@ def rollout_env_worker(
                 raise e
 
         return tracker
+
+
+    def rollout_env_worker_loop(
+        self,
+        task: Task,
+        task_batch_index: int,
+        task_tag: str,
+        mode: Literal["sample", "validate"],
+        task_thread_index: int,
+        observation_window: dict,
+        completed_task_id_map_ct: Dict[str, List[BaseContextTracker]],
+        executor_lock: threading.Lock,
+        **kwargs,
+    ):
+        try:
+            cnt = 1
+            while True:
+
+                if observation_window["stop"][task_thread_index]:
+                    print('rollout_env_worker_loop received stop signal, exiting...')
+                    return
+
+                observation_window["info"][task_thread_index] = str(cnt)
+                tracker = self.rollout_env_worker(
+                    task=task,
+                    task_batch_index=task_batch_index,
+                    task_tag=task_tag,
+                    mode=mode,
+                    task_thread_index=task_thread_index,
+                    observation_window=observation_window,
+                    **kwargs,
+                )
+
+                # avoid write conflict
+                if tracker and tracker.reward_structure:
+                    with executor_lock:
+                        if tracker.task_id not in completed_task_id_map_ct:
+                            completed_task_id_map_ct[tracker.task_id] = [tracker]
+                        else:
+                            completed_task_id_map_ct[tracker.task_id] += [tracker]
+                cnt += 1
+                if observation_window["stop"][task_thread_index]:
+                    return
+                else:
+                    del tracker
+
+        except Exception as e:
+            logger.exception(
+                f"encounter exception in env_worker_loop error={e.args}"
+            )
+            raise e
@@ -49,9 +49,12 @@ def get_judge(self) -> BaseJudge:  # type: ignore
 
     def runner_hooks(self, observation_window, task_thread_index, workflow_task):
         def should_interrupt_fn() -> bool:
-            if (observation_window["stop"] is not None) and observation_window["stop"][
-                task_thread_index
-            ]:  # Check if the thread should stop (because other threads have completed, making this thread useless)
+            if (observation_window["stop"] is not None) and observation_window["stop"][task_thread_index]:  # Check if the thread should stop (because other threads have completed, making this thread useless)
+                return True
+            return False
+
+        def should_interrupt_hard_fn() -> bool:
+            if (observation_window["hard_stop"] is not None) and observation_window["hard_stop"][task_thread_index]:  # Check if the thread should stop (because other threads have completed, making this thread useless)
                 return True
             return False
 
@@ -60,6 +63,7 @@ def generated_token_callback_fn(token_array):
 
         return {
             "should_interrupt_fn": should_interrupt_fn,
+            "should_interrupt_hard_fn": should_interrupt_hard_fn,
             "generated_token_callback_fn": generated_token_callback_fn,
         }
 
 
@@ -1,6 +1,6 @@
 
-from ajet import AjetTuner
-from ajet import WorkflowOutput
+from ajet.tuner import AjetTuner
+from ajet.schema.task import WorkflowOutput, WorkflowTask
 from ajet.context_tracker.multiagent_tracking import (
     MultiAgentContextTracker,
 )
Original file line number	Diff line number	Diff line change
`@@ -1,6 +1,6 @@`
`1`	`1`
`2`		`-from ajet import AjetTuner`
`3`		`-from ajet import WorkflowOutput`
	`2`	`+from ajet.tuner import AjetTuner`
	`3`	`+from ajet.schema.task import WorkflowOutput, WorkflowTask`
`4`	`4`	`from ajet.context_tracker.multiagent_tracking import (`
`5`	`5`	`MultiAgentContextTracker,`
`6`	`6`	`)`