|
| 1 | + |
| 2 | +import atexit |
| 3 | +import json |
| 4 | +import requests |
| 5 | +import zmq |
| 6 | +import os |
| 7 | +import time |
| 8 | +from ajet import AjetTuner |
| 9 | +from ajet import WorkflowOutput |
| 10 | +from ajet.context_tracker.multiagent_tracking import ( |
| 11 | + MultiAgentContextTracker, |
| 12 | +) |
| 13 | +from ajet.context_tracker.basic_tracker import BaseContextTracker |
| 14 | +from ajet.schema.task import WorkflowTask |
| 15 | +from ajet.schema.trajectory import Reward |
| 16 | +from ajet.task_runner.base_runner import BaseAgentRunner |
| 17 | +from ajet.utils.networking import find_free_port |
| 18 | +from loguru import logger |
| 19 | +from ajet import Workflow |
| 20 | + |
| 21 | +context = zmq.Context() |
| 22 | +atexit.register(context.term) |
| 23 | + |
| 24 | +class TinkerScriptRunner(BaseAgentRunner): |
| 25 | + |
| 26 | + def get_zmq_socket(self, episode_uuid: str): |
| 27 | + interchange_method = self.config.ajet.interchange_server.interchange_method |
| 28 | + if interchange_method == 'tcp': |
| 29 | + master_node_ip = os.getenv("MASTER_NODE_IP", "localhost") |
| 30 | + episode_contect_address = f"tcp://{master_node_ip}:{find_free_port()}" |
| 31 | + elif interchange_method == 'ipc': |
| 32 | + ipc_path = f"/tmp/ajet/{episode_uuid}-workflow.sock" |
| 33 | + episode_contect_address = f"ipc://{ipc_path}" |
| 34 | + else: |
| 35 | + raise RuntimeError(f"Unknown interchange_method: {interchange_method}") |
| 36 | + return episode_contect_address |
| 37 | + |
| 38 | + |
| 39 | + def get_interchange_server_url(self): |
| 40 | + port = os.getenv("AJET_DAT_INTERCHANGE_PORT") |
| 41 | + if self.config.ajet.interchange_server.interchange_server_port != 'auto': |
| 42 | + port = str(int(self.config.ajet.interchange_server.interchange_server_port)) |
| 43 | + assert port is not None, "AJET_DAT_INTERCHANGE_PORT env var must be set" |
| 44 | + master_node_ip = os.getenv("MASTER_NODE_IP", "localhost") |
| 45 | + base_url = f"http://{master_node_ip}:{port}" |
| 46 | + return base_url |
| 47 | + |
| 48 | + |
| 49 | + def register_episode_and_wait_output(self, episode_uuid: str, openai_base_url: str, openai_api_key: str) -> WorkflowOutput: |
| 50 | + """Register the episode as ready in the TinkerScript data interchange center.""" |
| 51 | + from ajet.tuner_lib.weight_tuner.experimental.as_tinkerscript_server import RegisterEpisodeRequest |
| 52 | + |
| 53 | + # parse episode_uuid, openai_base_url, openai_api_key |
| 54 | + zmq_listen_result_addr = self.get_zmq_socket(episode_uuid) |
| 55 | + interchange_http_addr = self.get_interchange_server_url() |
| 56 | + rer = RegisterEpisodeRequest( |
| 57 | + episode_uuid=episode_uuid, |
| 58 | + openai_base_url=openai_base_url, |
| 59 | + openai_api_key=openai_api_key, |
| 60 | + zmq_listen_result_addr=zmq_listen_result_addr, |
| 61 | + ) |
| 62 | + logger.info(f"zmq_listen_result_addr: {zmq_listen_result_addr}, interchange_http_addr: {interchange_http_addr}") |
| 63 | + |
| 64 | + # send http request to tinkerscript server to register episode |
| 65 | + while True: |
| 66 | + try: |
| 67 | + response = requests.post( |
| 68 | + f"{interchange_http_addr}/register_episode", |
| 69 | + json=rer.model_dump(), # 或者 rer.model_dump() 如果使用 Pydantic v2 |
| 70 | + timeout=30 |
| 71 | + ) |
| 72 | + response.raise_for_status() |
| 73 | + result = response.json() |
| 74 | + if not result.get('success'): |
| 75 | + raise RuntimeError(f"Failed to register episode {episode_uuid}") |
| 76 | + logger.info(f"Successfully registered episode {episode_uuid}") |
| 77 | + break |
| 78 | + except requests.RequestException as e: |
| 79 | + logger.error(f"Error registering episode {episode_uuid}: {e}. Retrying...") |
| 80 | + time.sleep(5) |
| 81 | + |
| 82 | + # begin wait for result |
| 83 | + zmq_socket = zmq.Context().socket(zmq.REP) |
| 84 | + zmq_socket.bind(zmq_listen_result_addr) |
| 85 | + message = zmq_socket.recv_string() |
| 86 | + logger.success(f"Received workflow output for episode {episode_uuid}") |
| 87 | + zmq_socket.send_string("ack") |
| 88 | + return WorkflowOutput(**json.loads(message)) |
| 89 | + |
| 90 | + |
| 91 | + def execute(self, workflow_task: WorkflowTask) -> BaseContextTracker: |
| 92 | + observation_window = workflow_task.observation_window |
| 93 | + task_thread_index = workflow_task.task_thread_index |
| 94 | + |
| 95 | + hooks = self.runner_hooks( |
| 96 | + observation_window=observation_window, |
| 97 | + task_thread_index=task_thread_index, |
| 98 | + workflow_task=workflow_task, |
| 99 | + ) |
| 100 | + context_tracker = MultiAgentContextTracker( |
| 101 | + llm_inference_fn=self.llm_inference_fn, |
| 102 | + tokenizer=self.tokenizer, |
| 103 | + config=self.config, |
| 104 | + workflow_task = workflow_task, |
| 105 | + **hooks, |
| 106 | + ) |
| 107 | + tuner = AjetTuner( |
| 108 | + context_tracker=context_tracker, |
| 109 | + llm_inference_fn=self.llm_inference_fn, |
| 110 | + workflow_cls=Workflow, |
| 111 | + config=self.config, |
| 112 | + ) |
| 113 | + |
| 114 | + baseurl_apikey = tuner.as_oai_baseurl_apikey() |
| 115 | + base_url = baseurl_apikey.base_url |
| 116 | + api_key = baseurl_apikey.api_key |
| 117 | + |
| 118 | + workflow_output: WorkflowOutput = self.register_episode_and_wait_output( |
| 119 | + episode_uuid=context_tracker.episode_uuid, |
| 120 | + openai_base_url=base_url, |
| 121 | + openai_api_key=api_key, |
| 122 | + ) |
| 123 | + |
| 124 | + if workflow_output.reward is not None: |
| 125 | + raw_reward, is_success = ( |
| 126 | + workflow_output.reward, |
| 127 | + workflow_output.is_success, |
| 128 | + ) |
| 129 | + else: |
| 130 | + raise ValueError("workflow_output.reward is None in TinkerScriptRunner, this is currently not allowed.") |
| 131 | + |
| 132 | + workflow_task.gym_env = None # clear gym env client reference to avoid serialization issue |
| 133 | + |
| 134 | + assert not isinstance( |
| 135 | + raw_reward, list |
| 136 | + ), "AgentJet will support step reward in future versions." |
| 137 | + |
| 138 | + # register reward |
| 139 | + # TODO: support multi-step reward |
| 140 | + reward = Reward( |
| 141 | + raw_reward=raw_reward, |
| 142 | + raw_step_reward=None, # "AgentJet will support step reward in future versions." |
| 143 | + success_rate=1.0 if is_success else 0.0, |
| 144 | + madness=0, |
| 145 | + description="", |
| 146 | + ) |
| 147 | + context_tracker.process_reward(reward) |
| 148 | + # generate token before merging |
| 149 | + context_tracker.group_merge() |
| 150 | + # after merging, process and align reward again |
| 151 | + context_tracker.process_reward(reward) |
| 152 | + # mark the thread as ended |
| 153 | + observation_window["step"][task_thread_index] = -1 |
| 154 | + tuner.terminate_episode() |
| 155 | + context_tracker.log_metrics = workflow_output.log_metrics |
| 156 | + return context_tracker |
0 commit comments