1+ import os
2+ import json
3+ from ajet .utils .msg_converter import convert_grouped_steps_to_openai_format
4+
5+
6+ def save_train_trajectory (ctx_trackers , global_steps ):
7+ """Save training ctx_trackers to JSON files."""
8+ for ctx_tracker in ctx_trackers :
9+ reward = ctx_tracker .reward_structure .raw_reward
10+ if reward >= 1 :
11+ ctx_tracker .tag = "success"
12+ elif reward == 0 :
13+ ctx_tracker .tag = "failure"
14+ else :
15+ ctx_tracker .tag = "half_success"
16+
17+ # Use unified conversion function to convert grouped_steps to OpenAI format
18+ if hasattr (ctx_tracker , 'get_grouped_steps_openai_format' ):
19+ formatted_traj = ctx_tracker .get_grouped_steps_openai_format ()
20+ else :
21+ formatted_traj = convert_grouped_steps_to_openai_format (ctx_tracker .grouped_steps )
22+
23+ traj_data = {
24+ "task_id" : ctx_tracker .task_id ,
25+ "task_tag" : ctx_tracker .tag ,
26+ "reward_structure" : ctx_tracker .reward_structure .model_dump (),
27+ "traj" : formatted_traj
28+ }
29+ # Extract reward_stats from workflow_metadata
30+ if hasattr (ctx_tracker , 'workflow_metadata' ) and ctx_tracker .workflow_metadata :
31+ if 'reward_stats' in ctx_tracker .workflow_metadata :
32+ traj_data ['reward_structure' ]['reward_stats' ] = ctx_tracker .workflow_metadata ['reward_stats' ]
33+
34+ traj_save_dir = os .path .join (
35+ os .environ .get ("BEST_LOGGER_PATH" , "launcher_record" ),
36+ "ctx_trackers" ,
37+ "train" ,
38+ f"step_{ global_steps } "
39+ )
40+ os .makedirs (traj_save_dir , exist_ok = True )
41+ traj_file_path = os .path .join (traj_save_dir , f"{ ctx_tracker .task_id } .json" )
42+
43+ with open (traj_file_path , "w" , encoding = "utf-8" ) as f :
44+ json .dump (traj_data , f , ensure_ascii = False , indent = 2 )
45+
46+
47+ def save_eval_trajectory (ctx_trackers , global_steps ):
48+ """Save evaluation ctx_trackers to JSON files."""
49+ for ctx_tracker in ctx_trackers :
50+ # Use unified conversion function to convert grouped_steps to OpenAI format
51+ if hasattr (ctx_tracker , 'get_grouped_steps_openai_format' ):
52+ formatted_traj = ctx_tracker .get_grouped_steps_openai_format ()
53+ else :
54+ formatted_traj = convert_grouped_steps_to_openai_format (ctx_tracker .grouped_steps )
55+
56+ traj_data = {
57+ "task_id" : ctx_tracker .task_id ,
58+ "task_tag" : ctx_tracker .tag ,
59+ "reward_structure" : ctx_tracker .reward_structure .model_dump (),
60+ "traj" : formatted_traj
61+ }
62+
63+ # Extract reward_stats from workflow_metadata
64+ if hasattr (ctx_tracker , 'workflow_metadata' ) and ctx_tracker .workflow_metadata :
65+ if 'reward_stats' in ctx_tracker .workflow_metadata :
66+ traj_data ['reward_structure' ]['reward_stats' ] = ctx_tracker .workflow_metadata ['reward_stats' ]
67+
68+ traj_save_dir = os .path .join (
69+ os .environ .get ("BEST_LOGGER_PATH" , "launcher_record" ),
70+ "ctx_trackers" ,
71+ "val" ,
72+ f"step_{ global_steps } "
73+ )
74+ os .makedirs (traj_save_dir , exist_ok = True )
75+ traj_file_path = os .path .join (traj_save_dir , f"{ ctx_tracker .task_id } .json" )
76+
77+ with open (traj_file_path , "w" , encoding = "utf-8" ) as f :
78+ json .dump (traj_data , f , ensure_ascii = False , indent = 2 )
79+
80+ print (f"Saved trajectory to { traj_file_path } " )
0 commit comments