Skip to content

Commit 62c8a10

Browse files
committed
docs: add implementation examples for AgentScope, OpenAI, raw HTTP, and langchain in math agent and learn2ask tutorials
1 parent 13a7728 commit 62c8a10

File tree

2 files changed

+253
-53
lines changed

2 files changed

+253
-53
lines changed

docs/en/example_learning_to_ask.md

Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -99,6 +99,68 @@ At the code level, everything is implemented in `tutorial/example_learn2ask/lear
9999
* `ExampleLearn2Ask` defines the workflow: how the dialogue context is converted into the agent’s prompt/input, and what output format is expected (one follow-up question, optionally with choices).
100100
* `reward_fn` defines how to convert the judge’s feedback into a scalar reward used for training.
101101

102+
We provide two implmentations of the agent based on AgentScope and langchain:
103+
104+
=== "AgentScope"
105+
106+
```python
107+
# create the agent
108+
self.agent = ReActAgent(
109+
name="math_react_agent",
110+
sys_prompt=system_prompt,
111+
model=tuner.as_agentscope_model(),
112+
formatter=DashScopeChatFormatter(),
113+
toolkit=None,
114+
memory=InMemoryMemory(),
115+
max_iters=1,
116+
)
117+
self.agent.set_console_output_enabled(False)
118+
119+
# convert the messages to agent scope format and send to the agent
120+
msg = [
121+
# Msg("system", system_prompt, role="system"),
122+
*[Msg(name=x["role"], content=x["content"], role=x["role"]) for x in messages]
123+
]
124+
result = await self.agent.reply(msg)
125+
if isinstance(result.content, str):
126+
response = result.content
127+
elif isinstance(result.content, list):
128+
response = result.content[0]["text"] # type: ignore
129+
else:
130+
raise NotImplementedError(f"do not know how to handle {type(result.content)}")
131+
reward = await reward_fn_with_semaphore(msg, response, truth_action, truth_info)
132+
return WorkflowOutput(reward=reward)
133+
```
134+
135+
=== "Langchain"
136+
137+
```python
138+
# get the trainable llm
139+
llm_info=tuner.as_oai_baseurl_apikey()
140+
141+
# create the langchain agent
142+
llm=ChatOpenAI(
143+
base_url=llm_info.base_url,
144+
api_key=lambda:llm_info.api_key,
145+
)
146+
agent=create_agent(
147+
model=llm,
148+
system_prompt=system_prompt,
149+
)
150+
151+
# build messages and send to the agent
152+
msg=[
153+
{"role": x["role"], "content": x["content"]} for x in messages
154+
]
155+
result = agent.invoke({
156+
"messages": msg, # type: ignore
157+
})
158+
159+
response = result["messages"][-1].content
160+
reward = await reward_fn_with_semaphore(msg, response, truth_action, truth_info)
161+
return WorkflowOutput(reward=reward)
162+
```
163+
102164
#### 3.4 Reward
103165

104166
`llm_reward` is the LLM-as-a-judge called inside `reward_fn` to score the model output. The evaluation follows these rules:

docs/en/example_math_agent.md

Lines changed: 191 additions & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -102,20 +102,74 @@ Compare `final_answer` with reference, compute `raw_reward` and `is_success`.</l
102102

103103
Most wiring happens in `tutorial/example_math_agent/math_agent.yaml`:
104104

105-
```yaml title="math_agent.yaml"
106-
ajet:
107-
task_reader:
108-
type: huggingface_dat_repo # also supports: dataset_file / env_service
105+
=== "AgentScope"
109106

110-
rollout:
111-
user_workflow: tutorial.example_math_agent.math_agent->ExampleMathLearn
107+
```yaml title="math_agent.yaml"
108+
ajet:
109+
task_reader:
110+
type: huggingface_dat_repo # also supports: dataset_file / env_service
112111

113-
task_judge:
114-
judge_protocol: tutorial.example_math_agent.math_answer_as_judge->MathAnswerAndLlmAsJudge
112+
rollout:
113+
user_workflow: tutorial.example_math_agent.math_agent->ExampleMathLearn
114+
115+
task_judge:
116+
judge_protocol: tutorial.example_math_agent.math_answer_as_judge->MathAnswerAndLlmAsJudge
117+
118+
model:
119+
path: YOUR_MODEL_PATH
120+
```
121+
122+
=== "OpenAI"
123+
124+
```yaml title="math_agent.yaml"
125+
ajet:
126+
task_reader:
127+
type: huggingface_dat_repo # also supports: dataset_file / env_service
128+
129+
rollout:
130+
user_workflow: tutorial.example_math_agent.math_agent_oai_sdk->ExampleMathLearn
131+
132+
task_judge:
133+
judge_protocol: tutorial.example_math_agent.math_answer_as_judge->MathAnswerAndLlmAsJudge
134+
135+
model:
136+
path: YOUR_MODEL_PATH
137+
```
138+
139+
=== "Raw HTTP"
140+
141+
```yaml title="math_agent.yaml"
142+
ajet:
143+
task_reader:
144+
type: huggingface_dat_repo # also supports: dataset_file / env_service
145+
146+
rollout:
147+
user_workflow: tutorial.example_math_agent.math_agent_raw_http->ExampleMathLearn
148+
149+
task_judge:
150+
judge_protocol: tutorial.example_math_agent.math_answer_as_judge->MathAnswerAndLlmAsJudge
151+
152+
model:
153+
path: YOUR_MODEL_PATH
154+
```
155+
156+
=== "langchain"
157+
158+
```yaml title="math_agent.yaml"
159+
ajet:
160+
task_reader:
161+
type: huggingface_dat_repo # also supports: dataset_file / env_service
162+
163+
rollout:
164+
user_workflow: tutorial.example_math_agent.math_agent_langchain->ExampleMathLearn
165+
166+
task_judge:
167+
judge_protocol: tutorial.example_math_agent.math_answer_as_judge->MathAnswerAndLlmAsJudge
168+
169+
model:
170+
path: YOUR_MODEL_PATH
171+
```
115172

116-
model:
117-
path: YOUR_MODEL_PATH
118-
```
119173

120174
| Field | Description |
121175
|-------|-------------|
@@ -151,50 +205,134 @@ ajet:
151205
return WorkflowOutput(reward=None, metadata={"final_answer": final_answer})
152206
```
153207

208+
=== "OpenAI"
209+
210+
```python title="Workflow Sketch"
211+
client = tuner.as_raw_openai_sdk_client()
212+
213+
# call 1: get response with tool call
214+
messages = [
215+
{ "role": "system", "content": self.system_prompt },
216+
{ "role": "user", "content": query }
217+
]
218+
reply_message: ChatCompletion = await client.chat.completions.create(messages=messages, tools=self.available_functions)
219+
if (reply_message.choices[0].message.content):
220+
messages.append({
221+
"role": "assistant",
222+
"content": reply_message.choices[0].message.content
223+
})
224+
225+
# If the model called a tool
226+
if (reply_message.choices[0].message) and (reply_message.choices[0].message.tool_calls):
227+
tool_calls: list[ChatCompletionMessageToolCall] = reply_message.choices[0].message.tool_calls
228+
for tool_call in tool_calls:
229+
if tool_call.function.name == "execute_python_code":
230+
arguments = json.loads(tool_call.function.arguments)
231+
232+
def sync_wrapper():
233+
import subprocess
234+
import sys
235+
process = subprocess.run(
236+
[sys.executable, "-c", arguments["code"]],
237+
timeout=arguments.get("timeout", 300),
238+
capture_output=True,
239+
text=True
240+
)
241+
return process.stdout
242+
243+
result = await asyncio.to_thread(sync_wrapper)
244+
tool_result_message = {
245+
"role": "tool",
246+
"tool_call_id": tool_call.id,
247+
"name": tool_call.function.name,
248+
"content": json.dumps({
249+
"return_code": str(result),
250+
})
251+
}
252+
messages.append(tool_result_message)
253+
254+
# Step 3: Make a follow-up API call with the tool result
255+
final_response: ChatCompletion = await client.chat.completions.create(
256+
messages=messages,
257+
)
258+
final_stage_response = final_response.choices[0].message.content
259+
else:
260+
final_stage_response = reply_message.choices[0].message.content
261+
262+
263+
return WorkflowOutput(reward=None, metadata={"final_answer": final_stage_response})
264+
```
265+
266+
267+
=== "Raw HTTP"
268+
269+
```python title="raw http"
270+
url_and_apikey = tuner.as_oai_baseurl_apikey()
271+
base_url = url_and_apikey.base_url
272+
api_key = url_and_apikey.api_key
273+
274+
# take out query
275+
query = workflow_task.task.main_query
276+
277+
messages = [
278+
{
279+
"role": "system",
280+
"content": self.system_prompt
281+
},
282+
{
283+
"role": "user",
284+
"content": query
285+
}
286+
]
287+
288+
# use raw http requests (non-streaming) to get response
289+
response = requests.post(
290+
f"{base_url}/chat/completions",
291+
json={
292+
"model": "fill_whatever_model", # Of course, this `model` field will be ignored.
293+
"messages": messages,
294+
},
295+
headers={
296+
"Authorization": f"Bearer {api_key}"
297+
}
298+
)
299+
final_answer = response.json()['choices'][0]['message']['content']
300+
return WorkflowOutput(reward=None, metadata={"final_answer": final_answer})
301+
```
302+
303+
154304
=== "Langchain"
155305

156-
```python
157-
158-
class ExampleMathLearn(Workflow):
159-
160-
name: str = "math_agent_workflow"
161-
system_prompt: str = dedent("""
162-
You are an agent specialized in solving math problems.
163-
Please solve the math problem given to you.
164-
You can write and execute Python code to perform calculation or verify your answer.
165-
You should return your final answer within \\boxed{{}}.
166-
""")
167-
168-
async def execute(self, workflow_task: WorkflowTask, tuner: AjetTuner) -> WorkflowOutput: # type: ignore
169-
# tuner to api key
170-
url_and_apikey = tuner.as_oai_baseurl_apikey()
171-
base_url = url_and_apikey.base_url
172-
api_key = url_and_apikey.api_key
173-
174-
from langchain_openai import ChatOpenAI
175-
llm=ChatOpenAI(
176-
base_url=base_url,
177-
api_key=lambda:api_key,
178-
)
179-
agent=create_agent(
180-
model=llm,
181-
system_prompt=self.system_prompt,
182-
)
183-
184-
# take out query
185-
query = workflow_task.task.main_query
186-
187-
response = agent.invoke({
188-
"messages": [
189-
{
190-
"role": "user",
191-
"content": query
192-
}
193-
],
194-
})
195-
196-
final_answer = response['messages'][-1].content
197-
return WorkflowOutput(reward=None, metadata={"final_answer": final_answer})
306+
```python title="langchain"
307+
# tuner to api key
308+
url_and_apikey = tuner.as_oai_baseurl_apikey()
309+
base_url = url_and_apikey.base_url
310+
api_key = url_and_apikey.api_key
311+
312+
from langchain_openai import ChatOpenAI
313+
llm=ChatOpenAI(
314+
base_url=base_url,
315+
api_key=lambda:api_key,
316+
)
317+
agent=create_agent(
318+
model=llm,
319+
system_prompt=self.system_prompt,
320+
)
321+
322+
# take out query
323+
query = workflow_task.task.main_query
324+
325+
response = agent.invoke({
326+
"messages": [
327+
{
328+
"role": "user",
329+
"content": query
330+
}
331+
],
332+
})
333+
334+
final_answer = response['messages'][-1].content
335+
return WorkflowOutput(reward=None, metadata={"final_answer": final_answer})
198336
```
199337

200338
!!! warning "Important"

0 commit comments

Comments
 (0)