Skip to content

Commit 87d2184

Browse files
committed
feat(deepfinance): add blog and readme
1 parent c768200 commit 87d2184

File tree

14 files changed

+1292
-1025
lines changed

14 files changed

+1292
-1025
lines changed

ajet/utils/metric_helper/reward_metric_helper.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -80,13 +80,12 @@ def compute_reward_metrics(reward_stats_list: List[Dict[str, Any]], prefix: str
8080
metrics[f"{prefix}rewards/penalty_rate"] = float(len(non_zero_penalties) / n * 100) if n > 0 else 0.0
8181

8282
# ========== OpenJudge Metrics ==========
83-
# OpenJudge graders: presentation_quality, grounding, audit, ebtu
83+
# OpenJudge graders: presentation_quality, grounding, audit
8484
openjudge_graders = [
8585
"presentation_quality",
8686
"grounding",
8787
"planning",
8888
"audit",
89-
"ebtu",
9089
]
9190

9291
for grader_name in openjudge_graders:
Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
# API keys
2+
OPENAI_API_KEY="sk-xxx"
3+
OPENAI_BASE_URL="https://dashscope.aliyuncs.com/compatible-mode/v1"
4+
RM_BASE_URL="https://dashscope.aliyuncs.com/compatible-mode/v1"
5+
RM_API_KEY="sk-xxx"
6+
OPENJUDGE_BASE_URL="https://dashscope.aliyuncs.com/compatible-mode/v1"
7+
OPENJUDGE_API_KEY="sk-xxx"
8+
STRONG_MODEL_API_KEY="sk-xxx"
9+
10+
SWANLAB_API_KEY="xxx"
11+
12+
# data path, save path
13+
ENV_SERVICE_ROOT="/path/to/env_service"
14+
CONDA_PATH="/path/to/conda/conda.sh"
15+
MODEL_PATH="/path/to/base_model"
16+
CKPT_SAVE_PATH="/path/to/ckpt_path"
17+
# 新增:数据文件路径配置
18+
TRAIN_DATA_PATH="/path/to/train_data"
19+
VAL_DATA_PATH="/path/to/val_data"
20+
21+
22+
TRAIN_REF_ANS_PATH="/path/to/train_reference_answer"
23+
VAL_REF_ANS_PATH="/path/to/val_reference_answer"
24+
25+
26+
# Port
27+
ADDR=""
28+
MCP_PORT=""

tutorial/example_deep_finance/blog_cn.md

Lines changed: 531 additions & 0 deletions
Large diffs are not rendered by default.

tutorial/example_deep_finance/blog_en.md

Lines changed: 332 additions & 0 deletions
Large diffs are not rendered by default.

tutorial/example_deep_finance/deep_finance.md

Lines changed: 267 additions & 168 deletions
Large diffs are not rendered by default.

tutorial/example_deep_finance/deep_finance.sh

Lines changed: 50 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -1,38 +1,40 @@
11
#!/bin/bash
2-
set -e
2+
set -e
33
#===============================================================================
44
# 1. 配置区域 - 用户只需修改这里
55
#===============================================================================
6-
SUFFIX="newjudge" # 实验后缀,影响所有日志和实验名称
7-
PREFIX="ajet_newjudge" # 实验前缀,影响日志和实验所在文件夹
6+
SUFFIX="deepfinance" # 实验后缀,影响所有日志和实验名称
7+
PREFIX="ajet_deepfinance" # 实验前缀,影响日志和实验所在文件夹
88

99
# OpenJudge 模型配置
10-
OPENJUDGE_LLM='qwen-flash' # OpenJudge 评分模型
11-
RM_LLM='qwen-max' # RM Gallery 评分模型
12-
JUDGE_CONCURRENCY=10
13-
10+
# finance_llm 可单独配置 Finance 评估使用的模型,留空则复用 OPENJUDGE_LLM
11+
OPENJUDGE_LLM='qwen-flash' # OpenJudge 评分模型(用于通用评估)
12+
FINANCE_LLM='qwen-max' # Finance 评估专用模型(可选,留空则复用 OPENJUDGE_LLM)
13+
JUDGE_CONCURRENCY=20
1414
# 奖励权重配置
15-
RM_WEIGHT=0.5
16-
PRESENTATION_QUALITY_WEIGHT=0.25
17-
GROUNDING_WEIGHT=0.25
18-
CGCV_WEIGHT=0.0 # 不使用 CGCV,设为 0
19-
AUDIT_WEIGHT=0.0 # 不使用 Audit,设为 0
20-
TRACEABILITY_WEIGHT=0.0 # 不使用 Traceability,设为 0
21-
EBTU_WEIGHT=0.0 # 不使用 EBTU,设为 0
22-
15+
# rm_weight 现在对应 FinanceCompositionEvaluator(基于 OpenJudge)
16+
RM_WEIGHT=0.5 # Finance 评估权重(stock_analysis/industry/macro/event/search)
17+
PRESENTATION_QUALITY_WEIGHT=0.2 # 报告呈现质量
18+
GROUNDING_WEIGHT=0.1 # 引用规范性评估
19+
AUDIT_WEIGHT=0.2 # 引用逻辑审计
2320
# 训练参数配置
24-
NUM_REPEAT=4 # group size,每个query rollout NUM_REPEAT次
25-
TRAIN_BATCH_SIZE=32 # 训练batchsize
26-
NUM_STEPS=6 # 每个样本step轮数
21+
NUM_REPEAT=4 # group size,每个query rollout NUM_REPEAT次
22+
TRAIN_BATCH_SIZE=32 # 训练batchsize
23+
NUM_STEPS=10 # 每个样本step轮数
2724
DEEPFINANCE_TOOL_RESULT_MAX_CHARS=10000
2825

2926
# Env Service URL 配置
3027
ENV_SERVICE_URL="http://127.0.0.1:8080" # 环境服务地址
3128

3229
# 主目录(需要更改)
33-
export AJET_ROOT="/mnt/data_cpfs/taoshuchang.tsc/deepresearch/AgentJet_new"
30+
export AJET_ROOT="/path/to/agent_jet"
3431

35-
NNODES=${WORLD_SIZE}
32+
NNODES=${WORLD_SIZE:-1}
33+
GPUS_PER_NODE=8
34+
CURRENT_TIME=$(date "+%Y%m%d_%H%M%S")
35+
LOG_DIR="${AJET_ROOT}/logs/${PREFIX}"
36+
TRAIN_LOG="${LOG_DIR}/train_${SUFFIX}_${CURRENT_TIME}.log"
37+
mkdir -p ${LOG_DIR}
3638

3739
# 涉密的配置(API_KEY以及模型、数据位置)从.env读取
3840
cd ${AJET_ROOT}
@@ -48,12 +50,15 @@ if [ -f "$ENV_FILE" ]; then
4850
else
4951
echo -e "\033[31m警告: 找不到 .env 文件: $ENV_FILE\033[0m"
5052
fi
53+
export TRAIN_DATA_PATH="/mnt/data_cpfs/taoshuchang.tsc/deepresearch/AgentJet_new/tutorial/example_deep_finance/data/train_merged_all.json"
54+
export TRAIN_REF_ANS_PATH="/mnt/data_cpfs/taoshuchang.tsc/deepresearch/AgentJet_new/tutorial/example_deep_finance/data/Reference_merged_all.json"
55+
5156

5257
#===============================================================================
5358
# 2. 动态生成配置文件 (从yaml template生成yaml)
5459
#===============================================================================
5560
# 修改:配置文件生成路径,现在动态生成到 yaml 目录下
56-
CONFIG_TEMPLATE="tutorial/example_deep_finance/deep_finance.yaml"
61+
CONFIG_TEMPLATE="tutorial/example_deep_finance/yaml_template/deepfinance_template.yaml"
5762
CONFIG_FILE="${AJET_ROOT}/tutorial/example_deep_finance/yaml/${SUFFIX}.yaml"
5863
mkdir -p $(dirname ${CONFIG_FILE})
5964

@@ -64,10 +69,8 @@ sed -e "s|{{SUFFIX}}|${SUFFIX}|g" \
6469
-e "s|{{RM_WEIGHT}}|${RM_WEIGHT}|g" \
6570
-e "s|{{PRESENTATION_QUALITY_WEIGHT}}|${PRESENTATION_QUALITY_WEIGHT}|g" \
6671
-e "s|{{GROUNDING_WEIGHT}}|${GROUNDING_WEIGHT}|g" \
67-
-e "s|{{CGCV_WEIGHT}}|${CGCV_WEIGHT}|g" \
6872
-e "s|{{AUDIT_WEIGHT}}|${AUDIT_WEIGHT}|g" \
69-
-e "s|{{TRACEABILITY_WEIGHT}}|${TRACEABILITY_WEIGHT}|g" \
70-
-e "s|{{EBTU_WEIGHT}}|${EBTU_WEIGHT}|g" \
73+
-e "s|{{FINANCE_LLM}}|${FINANCE_LLM}|g" \
7174
-e "s|{{OPENJUDGE_LLM}}|${OPENJUDGE_LLM}|g" \
7275
-e "s|{{RM_LLM}}|${RM_LLM}|g" \
7376
-e "s|{{JUDGE_CONCURRENCY}}|${JUDGE_CONCURRENCY}|g" \
@@ -79,11 +82,19 @@ sed -e "s|{{SUFFIX}}|${SUFFIX}|g" \
7982
-e "s|{{TRAIN_REF_ANS_PATH}}|${TRAIN_REF_ANS_PATH}|g" \
8083
-e "s|{{VAL_REF_ANS_PATH}}|${VAL_REF_ANS_PATH}|g" \
8184
-e "s|{{CKPT_SAVE_PATH}}|${CKPT_SAVE_PATH}|g" \
85+
-e "s|{{MAX_MODEL_LEN}}|${MAX_MODEL_LEN}|g" \
8286
-e "s|{{ENV_SERVICE_URL}}|${ENV_SERVICE_URL}|g" \
8387
${AJET_ROOT}/${CONFIG_TEMPLATE} > ${CONFIG_FILE}
8488

8589
echo "配置文件已生成: ${CONFIG_FILE}"
86-
echo "参数确认: RM=${RM_WEIGHT}, PresentationQuality=${PRESENTATION_QUALITY_WEIGHT}, Grounding=${GROUNDING_WEIGHT}, CGCV=${CGCV_WEIGHT}, Audit=${AUDIT_WEIGHT}, Traceability=${TRACEABILITY_WEIGHT}, EBTU=${EBTU_WEIGHT}, OpenJudge=${OPENJUDGE_LLM}, RM_LLM=${RM_LLM}"
90+
echo "=== OpenJudge Finance 配置 ==="
91+
echo " Finance评估权重: ${RM_WEIGHT} (使用 FinanceCompositionEvaluator)"
92+
echo " Finance评估模型: ${FINANCE_LLM_DISPLAY}"
93+
echo " PresentationQuality: ${PRESENTATION_QUALITY_WEIGHT}"
94+
echo " Grounding: ${GROUNDING_WEIGHT}"
95+
echo " CGCV: ${CGCV_WEIGHT}"
96+
echo " Audit: ${AUDIT_WEIGHT}"
97+
echo " OpenJudge LLM: ${OPENJUDGE_LLM}"
8798

8899
#===============================================================================
89100
# 3. 环境配置
@@ -112,7 +123,7 @@ cat > ${DEEPFINANCE_MCP_CONFIG} << EOF
112123
}
113124
}
114125
EOF
115-
export DEEPFINANCE_MCP_CONFIG DEEPFINANCE_TOOL_RESULT_MAX_CHARS
126+
export DEEPFINANCE_MCP_CONFIG DEEPFINANCE_TOOL_RESULT_MAX_CHARS
116127

117128
# 其他服务配置
118129
HF_ENDPOINT="https://hf-mirror.com"
@@ -121,16 +132,12 @@ export HF_ENDPOINT ES_HOSTS
121132

122133
# log 文件位置
123134
CURRENT_TIME=$(date "+%Y%m%d_%H%M%S")
124-
LOG_DIR="${AJET_ROOT}/logs/${PREFIX}"
125-
MASTER_IP_FILE="${LOG_DIR}/master-ip_${SUFFIX}.log"
126-
ENV_SERVICE_LOG="${LOG_DIR}/env_service_${SUFFIX}_${CURRENT_TIME}.log"
127-
TRAIN_LOG="${LOG_DIR}/train_${SUFFIX}_${CURRENT_TIME}.log"
128135
env_log_prefix="${SUFFIX}__${CURRENT_TIME}"
136+
MASTER_IP_FILE="${LOG_DIR}/master-ip_${SUFFIX}.log"
137+
129138
# 多机训练参数配置
130-
GPUS_PER_NODE=8
131139
EXPECTED_WORKERS=$WORLD_SIZE
132140

133-
134141
#===============================================================================
135142
# 4. 工具函数 以及 NCCL 配置(固定)
136143
#===============================================================================
@@ -165,17 +172,22 @@ export NCCL_ASYNC_ERROR_HANDLING=1
165172
# 5. 工具envservice 环境变量
166173
#===============================================================================
167174

168-
export PYTHONPATH="${AJET_ROOT}:${PYTHONPATH}"
175+
export PYTHONPATH="${AJET_ROOT}:${OPENJUDGE_ROOT}:${PYTHONPATH}"
169176
export RAY_CLUSTER_MODE="multi_node"
170-
export DEEPFINANCE_PATH="${ENV_SERVICE_ROOT}" # AgentJet 内部可能使用此路径
177+
export DEEPFINANCE_PATH="${ENV_SERVICE_ROOT}"
171178
export DEEPFINANCE_SCRIPT="source /mnt/data/taoshuchang.tsc/anaconda3/etc/profile.d/conda.sh && conda activate finworld_1209 && cd ${ENV_SERVICE_ROOT} && DEEPFINANCE_TOOL_RESULT_MAX_CHARS=${DEEPFINANCE_TOOL_RESULT_MAX_CHARS} DEEPFINANCE_MCP_CONFIG=${DEEPFINANCE_MCP_CONFIG} CACHE_TYPE=${CACHE_TYPE} MONGO_URI=${MONGO_URI} MONGO_DB_NAME=${MONGO_DB_NAME} MONGO_COLLECTION_NAME=${MONGO_COLLECTION_NAME} python -m env_service.env_service --env finworld --portal 0.0.0.0 --port 8080"
179+
# 打印 PYTHONPATH 确认
180+
echo "=== PYTHONPATH 配置 ==="
181+
echo " AJET_ROOT: ${AJET_ROOT}"
182+
echo " OPENJUDGE_ROOT: ${OPENJUDGE_ROOT}"
172183

173184

174185
#===============================================================================
175186
# 6. 主流程
176187
#===============================================================================
177188
log "开始多机多卡训练: ${SUFFIX}"
178189
log "节点数: ${NNODES}, 每节点GPU数: ${GPUS_PER_NODE}"
190+
log "使用 OpenJudge FinanceCompositionEvaluator 进行 Finance 评估"
179191
mkdir -p ${LOG_DIR}
180192
mkdir -p $(dirname ${CONFIG_FILE})
181193

@@ -208,8 +220,10 @@ if [[ $HOSTNAME == *"-master-"* ]]; then
208220
export RAY_ADDRESS="ray://localhost:10001"
209221

210222
print_green "==================================="
211-
print_green "Training Configuration"
223+
print_green "OpenJudge Finance Training"
212224
print_green "Total GPUs: $((NNODES * GPUS_PER_NODE))"
225+
print_green "OpenJudge LLM: ${OPENJUDGE_LLM}"
226+
print_green "Finance Weight: ${RM_WEIGHT}"
213227
print_green "Log: ${TRAIN_LOG}"
214228
print_green "==================================="
215229

@@ -232,4 +246,4 @@ else
232246
ray stop || true
233247
ray start --address $MASTER_ADDR:6379 --num-gpus 8
234248
while true; do sleep 60; done
235-
fi
249+
fi

tutorial/example_deep_finance/deep_finance_judge.py

Lines changed: 0 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,6 @@
1919
PresentationQualityGrader,
2020
GroundingGrader,
2121
AuditGrader,
22-
EBTUTraceabilityGrader,
2322
FinanceCompositionEvaluator,
2423
)
2524
# =============================================================================
@@ -107,7 +106,6 @@ def _setup_weights(self):
107106
"presentation_quality": getattr(cfg, "presentation_quality_weight", 0.25) if cfg else 0.25,
108107
"grounding": getattr(cfg, "grounding_weight", 0.0) if cfg else 0.0, # 引用规范性评估
109108
"audit": getattr(cfg, "audit_weight", 0.0) if cfg else 0.0, # 引用逻辑审计
110-
"ebtu": getattr(cfg, "ebtu_weight", 0.0) if cfg else 0.0, # EBTU证据优先可追溯性审计
111109
}
112110

113111
# 归一化(注意:action_loop 是惩罚项,不参与归一化;finance 需要参与归一化)
@@ -275,11 +273,6 @@ def extract_report_content(data: Dict) -> str:
275273
grader=AuditGrader(model=model),
276274
mapper=lambda data: {"traj": data},
277275
),
278-
# EBTU: Evidence-Backed Trace Units 证据优先可追溯性审计
279-
"ebtu": GraderConfig(
280-
grader=EBTUTraceabilityGrader(model=model),
281-
mapper=lambda data: {"traj": data},
282-
),
283276
}
284277

285278
def compute_reward(self, workflow_task: WorkflowTask, workflow_output: WorkflowOutput) -> Tuple[float, bool]:

0 commit comments

Comments
 (0)