|
6 | 6 | backbone: debug # `debug` or `trinity` or `verl` |
7 | 7 |
|
8 | 8 |
|
9 | | - # the experimental reverse proxy feature that allows `tuner.as_oai_baseurl_apikey` feature |
10 | | - enable_experimental_reverse_proxy: False |
11 | | - |
12 | | - # submit llm infer submit method |
13 | | - llm_infer_submit_method: "async" # options: "sync", "async" |
14 | | - |
15 | | - task_runner: |
16 | | - wrapper_type: "asyncio-with-gc" |
17 | | - wrapper_multiprocessing_timeout: 3600 # in seconds |
18 | | - # - wrapper_type: "asyncio-with-gc": safe, with periodic garbage collection to prevent event loop leaks (recommended) |
19 | | - # - wrapper_type: "asyncio": fast, but may cause event loop leak in long run |
20 | | - # - wrapper_type: "multi-processing": safe, but resource consuming |
21 | | - |
22 | 9 | model: |
23 | 10 | # which model should be trained |
24 | 11 | path: /path/to/model/such/as/Qwen/Qwen2___5-14B-Instruct |
|
42 | 29 | force_disable_toolcalls: False |
43 | 30 |
|
44 | 31 | # maximum number of parallel environments / simulate workers |
45 | | - max_env_worker: 128 |
| 32 | + max_env_worker: 64 |
46 | 33 |
|
47 | 34 | # step reward gamma (experimental, do not change) |
48 | 35 | gamma: 1.0 |
@@ -293,7 +280,28 @@ ajet: |
293 | 280 | save_trajectory_as_json_file: False |
294 | 281 |
|
295 | 282 |
|
| 283 | + # the experimental reverse proxy feature that allows `tuner.as_oai_baseurl_apikey` feature |
| 284 | + enable_experimental_interchange_server: False |
| 285 | + interchange_server: |
| 286 | + interchange_method: 'ipc' # options: 'tcp' (multi-nodes) or 'ipc' (1 node) |
| 287 | + interchange_server_port: 'auto' |
| 288 | + num_fastapi_process: 2 # 1, 2 or 4 is fine |
| 289 | + max_fastapi_threads: 128 # 64 or 128 is fine |
| 290 | + max_inference_tracker_threads: 64 # recommend to be equal to `ajet.rollout.max_env_worker` |
| 291 | + |
| 292 | + |
| 293 | + task_runner: |
| 294 | + # submit llm infer submit method |
| 295 | + llm_infer_submit_method: "async" # options: "sync", "async" |
| 296 | + |
| 297 | + # how to wrap the user-defined workflow |
| 298 | + wrapper_type: "asyncio-with-gc" |
| 299 | + # - wrapper_type: "asyncio-with-gc": safe, with periodic garbage collection to prevent event loop leaks (recommended) |
| 300 | + # - wrapper_type: "asyncio": fast, but may cause event loop leak in long run |
| 301 | + # - wrapper_type: "multi-processing": safe, but resource consuming |
296 | 302 |
|
| 303 | + # when `wrapper_type` is `multi-processing`, the timeout for each task |
| 304 | + wrapper_multiprocessing_timeout: 3600 # in seconds |
297 | 305 |
|
298 | 306 | # DO NOT EDIT, FOR ROBOT TESTING PURPOSE ONLY. NOT FOR HUMAN. |
299 | 307 | execute_test: False # DO NOT EDIT, FOR ROBOT TESTING PURPOSE ONLY. NOT FOR HUMAN. |
|
0 commit comments