@@ -278,9 +278,9 @@ ajet --conf tutorial/example_math_agent/math_agent.yaml
278278
279279## Full Code {#full-code}
280280
281- === "` math_agent.py ` - AgentJet Workflow (After Convertion)"
281+ === "` tutorial/example_math_agent/ math_agent.py` - AgentJet Workflow (After Convertion)"
282282
283- ```python title="math_agent.py"
283+ ```python
284284 import re
285285 from loguru import logger
286286 from agentscope.message import Msg
@@ -344,9 +344,10 @@ ajet --conf tutorial/example_math_agent/math_agent.yaml
344344 if match: is_success = (match.group(1) == reference_answer)
345345 else: is_success = False
346346 return WorkflowOutput(reward=(1.0 if is_success else 0.0), metadata={"final_answer": final_answer})
347+
347348 ```
348349
349- === "` math_agent.yaml ` - Configuration Yaml"
350+ === "` tutorial/example_math_agent/ math_agent.yaml` - Configuration Yaml"
350351
351352 ```yaml
352353 # ------------------ main configuration ------------------
@@ -356,25 +357,25 @@ ajet --conf tutorial/example_math_agent/math_agent.yaml
356357 type: huggingface_dat_repo # ✨✨✨✨ `env_service` or `dataset_file` or `huggingface_dat_repo`
357358 # effective when `type: huggingface_dat_repo`
358359 huggingface_dat_repo:
359- dataset_path: 'openai/gsm8k'
360+ dataset_path: 'openai/gsm8k' # '/mnt/data_cpfs/dataset_cache/openai/gsm8k/main'
360361 training_split: "train"
361362 validation_split: "test"
362363
363- task_judge:
364- # ✨✨✨✨ null, because in this certain case, we write reward function together with workflow
365- judge_protocol: null
366-
367364 model:
368365 # ✨✨✨✨ set the model to be trained
369- path: Qwen/Qwen2.5 -7B
366+ path: Qwen/Qwen2___5 -7B-Instruct # /mnt/data_cpfs/model_cache/modelscope/hub/Qwen/Qwen/Qwen2___5-7B-Instruct
370367
371368 rollout:
372- user_workflow: "tutorial. example_math_agent. math_agent->ExampleMathLearn " # ✨✨✨✨ write and select workflow
369+ user_workflow: "tutorial/ example_math_agent/ math_agent.py->MathToolWorkflow " # ✨✨✨✨ write and select workflow
373370 num_repeat: 6 # grpo `n`
374371 tensor_model_parallel_size: 1 # vllm tp
375372 max_response_length_in_one_turn: 1024
376373 max_model_len: 10000
377374
375+ task_judge:
376+ # ✨✨✨✨ null, because in this certain case, we write reward function together with workflow
377+ judge_protocol: null
378+
378379 data:
379380 train_batch_size: 100
380381 max_prompt_length: 3000
0 commit comments