modelscope
diff --git a/‎ajet/backbone/main_trinity.py‎
Lines changed: 2 additions & 2 deletions b/‎ajet/backbone/main_trinity.py‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎ajet/backbone/main_verl.py‎
Lines changed: 2 additions & 2 deletions b/‎ajet/backbone/main_verl.py‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎ajet/backbone/main_vllm.py‎
Lines changed: 2 additions & 2 deletions b/‎ajet/backbone/main_vllm.py‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎ajet/backbone/trainer_trinity.py‎
Lines changed: 1 addition & 1 deletion b/‎ajet/backbone/trainer_trinity.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎ajet/backbone/trainer_verl.py‎
Lines changed: 1 addition & 1 deletion b/‎ajet/backbone/trainer_verl.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎ajet/backbone/warm_up.py‎
Lines changed: 8 additions & 0 deletions b/‎ajet/backbone/warm_up.py‎
Lines changed: 8 additions & 0 deletions
diff --git a/‎ajet/default_config/ajet_default.yaml‎
Lines changed: 22 additions & 14 deletions b/‎ajet/default_config/ajet_default.yaml‎
Lines changed: 22 additions & 14 deletions
diff --git a/‎ajet/task_rollout/async_llm_bridge.py‎
Lines changed: 1 addition & 1 deletion b/‎ajet/task_rollout/async_llm_bridge.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎ajet/task_rollout/single_worker.py‎
Lines changed: 1 addition & 1 deletion b/‎ajet/task_rollout/single_worker.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎ajet/tuner.py‎
Lines changed: 5 additions & 5 deletions b/‎ajet/tuner.py‎
Lines changed: 5 additions & 5 deletions
@@ -51,9 +51,9 @@ def patched_trainer_get_actor(cls, config: Config):
     Trainer.get_actor = classmethod(patched_trainer_get_actor)
 
     ajet_config = get_ajet_config_from_trinity_side()
-    if ajet_config.ajet.enable_experimental_reverse_proxy:
+    if ajet_config.ajet.enable_experimental_interchange_server:
         from ajet.tuner_lib.weight_tuner.experimental.as_oai_model_server import start_interchange_server
-        start_interchange_server(ajet_config.ajet.experiment_dir)
+        start_interchange_server(ajet_config)
 
 
 if __name__ == "__main__":
 
@@ -246,9 +246,9 @@ def run(self, config):
 
         from ajet.backbone.trainer_verl import AjetRayPPOTrainer
 
-        if config.ajet.enable_experimental_reverse_proxy:
+        if config.ajet.enable_experimental_interchange_server:
             from ajet.tuner_lib.weight_tuner.experimental.as_oai_model_server import start_interchange_server
-            start_interchange_server(config.ajet.experiment_dir)
+            start_interchange_server(config)
 
         # Initialize the PPO trainer.
         trainer = AjetRayPPOTrainer(
 
@@ -184,9 +184,9 @@ def main(config):
     os.environ.update(runtime_env["env_vars"])
     # atexit.register(lambda: print("Process exiting, performing cleanup..."))
 
-    if config.ajet.enable_experimental_reverse_proxy:
+    if config.ajet.enable_experimental_interchange_server:
         from ajet.tuner_lib.weight_tuner.experimental.as_oai_model_server import start_interchange_server
-        start_interchange_server(config.ajet.experiment_dir)
+        start_interchange_server(config)
 
     def companion_launch():
         import torch
 
@@ -116,7 +116,7 @@ def __init__(
 
     async def run_async(self):
         ajet_config = get_ajet_config_from_trinity_side()
-        if ajet_config.ajet.enable_experimental_reverse_proxy:
+        if ajet_config.ajet.enable_experimental_interchange_server:
             raise NotImplementedError(
                 "The experimental reverse proxy is not supported in Trinity backbone yet."
             )
 
@@ -835,7 +835,7 @@ def fit(self):  # noqa: C901
                 self.global_steps += 1
 
                 # # when enabled oai request interchange, we need to clear the cache from time to time
-                # if self.config.ajet.enable_experimental_reverse_proxy:
+                # if self.config.ajet.enable_experimental_interchange_server:
                 #     from ajet.tuner_lib.weight_tuner.experimental.as_oai_model_server import ensure_dat_interchange_server_cache_clear
                 #     ensure_dat_interchange_server_cache_clear()
 
 
@@ -1,3 +1,8 @@
+"""
+Process level warm up
+"""
+
+
 import asyncio
 import logging
 import os
@@ -32,6 +37,9 @@ def init_parallel_rollout_logger(experiment_name):
 
     target_logger = logging.getLogger("vllm.entrypoints.openai.tool_parsers.hermes_tool_parser")
     target_logger.setLevel(logging.CRITICAL)
+    logging.getLogger("httpx").setLevel(logging.WARNING)
+
+
 
 def warm_up_process(config):
     """
 
@@ -6,19 +6,6 @@ ajet:
   backbone: debug # `debug` or `trinity` or `verl`
 
 
-  # the experimental reverse proxy feature that allows `tuner.as_oai_baseurl_apikey` feature
-  enable_experimental_reverse_proxy: False
-
-  # submit llm infer submit method
-  llm_infer_submit_method: "async" # options: "sync", "async"
-
-  task_runner:
-    wrapper_type: "asyncio-with-gc"
-    wrapper_multiprocessing_timeout: 3600  # in seconds
-    # - wrapper_type: "asyncio-with-gc":  safe, with periodic garbage collection to prevent event loop leaks (recommended)
-    # - wrapper_type: "asyncio":          fast, but may cause event loop leak in long run
-    # - wrapper_type: "multi-processing": safe, but resource consuming
-
   model:
     # which model should be trained
     path: /path/to/model/such/as/Qwen/Qwen2___5-14B-Instruct
@@ -42,7 +29,7 @@ ajet:
     force_disable_toolcalls: False
 
     # maximum number of parallel environments / simulate workers
-    max_env_worker: 128
+    max_env_worker: 64
 
     # step reward gamma (experimental, do not change)
     gamma: 1.0
@@ -293,7 +280,28 @@ ajet:
     save_trajectory_as_json_file: False
 
 
+  # the experimental reverse proxy feature that allows `tuner.as_oai_baseurl_apikey` feature
+  enable_experimental_interchange_server: False
+  interchange_server:
+    interchange_method: 'ipc' # options: 'tcp' (multi-nodes) or  'ipc' (1 node)
+    interchange_server_port: 'auto'
+    num_fastapi_process: 2  # 1, 2 or 4 is fine
+    max_fastapi_threads: 128  # 64 or 128 is fine
+    max_inference_tracker_threads: 64 # recommend to be equal to `ajet.rollout.max_env_worker`
+
+
+  task_runner:
+    # submit llm infer submit method
+    llm_infer_submit_method: "async" # options: "sync", "async"
+
+    # how to wrap the user-defined workflow
+    wrapper_type: "asyncio-with-gc"
+    # - wrapper_type: "asyncio-with-gc":  safe, with periodic garbage collection to prevent event loop leaks (recommended)
+    # - wrapper_type: "asyncio":          fast, but may cause event loop leak in long run
+    # - wrapper_type: "multi-processing": safe, but resource consuming
 
+    # when `wrapper_type` is `multi-processing`, the timeout for each task
+    wrapper_multiprocessing_timeout: 3600  # in seconds
 
   # DO NOT EDIT, FOR ROBOT TESTING PURPOSE ONLY. NOT FOR HUMAN.
   execute_test: False        # DO NOT EDIT, FOR ROBOT TESTING PURPOSE ONLY. NOT FOR HUMAN.
 
@@ -534,7 +534,7 @@ async def run_infer(
             #     otherwise, for abnormal output, can still proceed, but we do not track output anymore
 
         # run llm inference ✨
-        if self.config.ajet.llm_infer_submit_method == "sync":
+        if self.config.ajet.task_runner.llm_infer_submit_method == "sync":
             llm_output = await asyncio.to_thread(
                 self.llm_inference_fn, converted_message, custom_sampling_params, tools
             )
 
@@ -85,7 +85,7 @@ def rollout_env_worker(
         """
         sampling_params = get_sample_params(mode, self.config)
 
-        if self.config.ajet.llm_infer_submit_method == "sync":
+        if self.config.ajet.task_runner.llm_infer_submit_method == "sync":
             llm_inference_fn = self.async_llm_bridge.get_llm_inference_fn_sync(
                 sampling_params=sampling_params
             )
 
@@ -26,7 +26,7 @@ def __init__(
         self.context_tracker = context_tracker
         self.llm_inference_fn = llm_inference_fn
         self.target2proxy_registry: dict[str, dict[str,TunerTypeUnion]] = {}
-        if config.ajet.enable_experimental_reverse_proxy:
+        if config.ajet.enable_experimental_interchange_server:
             self.proxy_client_started = False
 
 
@@ -104,10 +104,10 @@ def as_oai_baseurl_apikey(
             ```
         """
 
-        assert self.config.ajet.enable_experimental_reverse_proxy, "Please enable `ajet.enable_experimental_reverse_proxy` in yaml config to use `as_oai_baseurl_apikey` feature."
+        assert self.config.ajet.enable_experimental_interchange_server, "Please enable `ajet.enable_experimental_interchange_server` in yaml config to use `as_oai_baseurl_apikey` feature."
         if self.proxy_client_started is False:
-            self._enable_experimental_interchange_server(self.llm_inference_fn)
             self.proxy_client_started = True
+            self._enable_experimental_interchange_server(self.llm_inference_fn)
         baseurl_apikey_model = OpenaiClientBaseUrlTuner(
             config=self.config,
             context_tracker=self.context_tracker,
@@ -171,7 +171,7 @@ def get_context_tracker(self) -> MultiAgentContextTracker:
 
     def _enable_experimental_interchange_server(self, llm_inference_fn):
         # experimental reverse proxy start
-        if self.config.ajet.enable_experimental_reverse_proxy:
+        if self.config.ajet.enable_experimental_interchange_server:
             from ajet.tuner_lib.weight_tuner.experimental.as_oai_model_client import InterchangeClient
             self.interchange_client = InterchangeClient(
                 episode_uuid=self.context_tracker.episode_uuid,
@@ -184,6 +184,6 @@ def _enable_experimental_interchange_server(self, llm_inference_fn):
 
     def terminate_episode(self):
         # experimental reverse proxy cleanup
-        if self.config.ajet.enable_experimental_reverse_proxy:
+        if self.config.ajet.enable_experimental_interchange_server:
             if (self.proxy_client_started is True) and hasattr(self, "interchange_client"):
                 self.interchange_client._should_terminate = True
Original file line number	Diff line number	Diff line change
`@@ -116,7 +116,7 @@ def __init__(`
`116`	`116`
`117`	`117`	`async def run_async(self):`
`118`	`118`	`ajet_config = get_ajet_config_from_trinity_side()`
`119`		`- if ajet_config.ajet.enable_experimental_reverse_proxy:`
	`119`	`+ if ajet_config.ajet.enable_experimental_interchange_server:`
`120`	`120`	`raise NotImplementedError(`
`121`	`121`	`"The experimental reverse proxy is not supported in Trinity backbone yet."`
`122`	`122`	`)`
Original file line number	Diff line number	Diff line change
`@@ -534,7 +534,7 @@ async def run_infer(`
`534`	`534`	`# otherwise, for abnormal output, can still proceed, but we do not track output anymore`
`535`	`535`
`536`	`536`	`# run llm inference ✨`
`537`		`- if self.config.ajet.llm_infer_submit_method == "sync":`
	`537`	`+ if self.config.ajet.task_runner.llm_infer_submit_method == "sync":`
`538`	`538`	`llm_output = await asyncio.to_thread(`
`539`	`539`	`self.llm_inference_fn, converted_message, custom_sampling_params, tools`
`540`	`540`	`)`
Original file line number	Diff line number	Diff line change
`@@ -85,7 +85,7 @@ def rollout_env_worker(`
`85`	`85`	`"""`
`86`	`86`	`sampling_params = get_sample_params(mode, self.config)`
`87`	`87`
`88`		`- if self.config.ajet.llm_infer_submit_method == "sync":`
	`88`	`+ if self.config.ajet.task_runner.llm_infer_submit_method == "sync":`
`89`	`89`	`llm_inference_fn = self.async_llm_bridge.get_llm_inference_fn_sync(`
`90`	`90`	`sampling_params=sampling_params`
`91`	`91`	`)`