-
Notifications
You must be signed in to change notification settings - Fork 24
Expand file tree
/
Copy pathdeep_finance_template.yaml
More file actions
88 lines (86 loc) · 3.31 KB
/
Copy pathdeep_finance_template.yaml
File metadata and controls
88 lines (86 loc) · 3.31 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
# ------------------ 主要配置 ------------------
ajet:
project_name: ajet_deep_finance
experiment_name: "{{SUFFIX}}"
# Judge 配置(嵌套结构,对应 self.config.ajet.judge.*)
judge:
openjudge_llm: {{OPENJUDGE_LLM}} # OpenJudge 模型
rm_llm: {{RM_LLM}} # RM Gallery 模型
concurrency: {{JUDGE_CONCURRENCY}} # Judge 并发数
train_ref_ans_path: {{TRAIN_REF_ANS_PATH}} # 训练集 Reference Answer 路径
val_ref_ans_path: {{VAL_REF_ANS_PATH}} # 验证集 Reference Answer 路径
# OpenJudge 权重配置
report_resolution_weight: {{REPORT_RESOLUTION_WEIGHT}} # 报告质量评估
trajectory_faithfulness_weight: {{TRAJECTORY_FAITHFULNESS_WEIGHT}} # 事实准确性评估
citation_audit_weight: {{CITATION_AUDIT_WEIGHT}} # 引用审计评估 (覆盖率 + 真实性)
rm_weight: {{RM_WEIGHT}} # RM Gallery 权重
task_judge:
# 使用本地 DeepFinanceJudge 进行评估(解耦远程 env_service)
judge_protocol: tutorial.example_deep_finance.deep_finance_judge->DeepFinanceJudgeByOpenJudge
model:
# ✨✨✨✨ 设置待训练的模型
path: {{MODEL_PATH}}
trainer_common:
nnodes: {{NNODES}}
n_gpus_per_node: 8
val_before_train: True
val_pass_n: 8
save_freq: 10
test_freq: 2
total_epochs: 200
save_trajectory_as_json_file: True
rollout:
# ✨✨✨✨ 编写并选择Agent
user_workflow: tutorial.example_deep_finance.deep_finance->ExampleDeepResearchProtocol
force_disable_toolcalls: False
enable_oversample: False
tensor_model_parallel_size: 8
num_repeat: {{NUM_REPEAT}}
max_env_worker: 64 # 增加环境并行数
max_num_seqs: 64 # 增加VLLM并发序列数
max_response_length_in_one_turn: 8000
max_model_len: 50000
agent_madness_reward: 0.0
compute_madness_checklist: None
multi_turn:
max_steps: {{NUM_STEPS}}
interchange_server:
interchange_method: 'tcp' # options: 'tcp' (multi-nodes) or 'ipc' (1 node)
debug:
debug_max_parallel: 64 # 增加并行任务数,充分利用GPU
debug_first_n_tasks: 100 # 增加处理的任务数
data:
train_batch_size: {{TRAIN_BATCH_SIZE}}
max_prompt_length: 8000
max_response_length: 41000
task_reader:
type: deep_finance # 数据从 JSON 加载并组装 init_messages,工具调用走 env_service
deep_finance:
training:
file_path: {{TRAIN_DATA_PATH}}
validation:
file_path: {{VAL_DATA_PATH}}
# env_service 仍需配置(用于工具调用)
env_service:
env_type: "finworld"
env_url: "http://127.0.0.1:8080"
env_action_preference: code
trainer:
default_local_dir: "{{CKPT_SAVE_PATH}}/{{PREFIX}}/{{SUFFIX}}"
# resume_mode: disable # 禁用自动恢复,从头开始训练
actor_rollout_ref:
rollout:
tensor_model_parallel_size: 8
gpu_memory_utilization: 0.8
# ------------------ 不需要修改 ------------------
hydra:
searchpath:
- file://ajet/default_config
- file://ajet/default_config/verl # verl only
- file://ajet/default_config/trinity # trinity only
# ------------------ 不需要修改 ------------------
defaults:
- verl_default # verl inherit 1/1
- trinity_default # trinity inherit 1/1
- ajet_default
- _self_