@@ -15,6 +15,10 @@ JUDGE_CONCURRENCY=10
1515RM_WEIGHT=0.5
1616PRESENTATION_QUALITY_WEIGHT=0.25
1717GROUNDING_WEIGHT=0.25
18+ CGCV_WEIGHT=0.0 # 不使用 CGCV,设为 0
19+ AUDIT_WEIGHT=0.0 # 不使用 Audit,设为 0
20+ TRACEABILITY_WEIGHT=0.0 # 不使用 Traceability,设为 0
21+ EBTU_WEIGHT=0.0 # 不使用 EBTU,设为 0
1822
1923# 训练参数配置
2024NUM_REPEAT=4 # group size,每个query rollout NUM_REPEAT次
@@ -28,7 +32,13 @@ ENV_SERVICE_URL="http://127.0.0.1:8080" # 环境服务地址
2832# 主目录(需要更改)
2933export AJET_ROOT=" /mnt/data_cpfs/taoshuchang.tsc/deepresearch/AgentJet_new"
3034
31- NNODES=${WORLD_SIZE}
35+ # 单机调试配置(默认值)
36+ NNODES=${WORLD_SIZE:- 1}
37+ GPUS_PER_NODE=8
38+ CURRENT_TIME=$( date " +%Y%m%d_%H%M%S" )
39+ LOG_DIR=" ${AJET_ROOT} /logs/${PREFIX} "
40+ TRAIN_LOG=" ${LOG_DIR} /train_${SUFFIX} _${CURRENT_TIME} .log"
41+ mkdir -p ${LOG_DIR}
3242
3343# 涉密的配置(API_KEY以及模型、数据位置)从.env读取
3444cd ${AJET_ROOT}
4555 echo -e " \033[31m警告: 找不到 .env 文件: $ENV_FILE \033[0m"
4656fi
4757
58+ export MODEL_PATH=" /mnt/data_cpfs/taoshuchang.tsc/models/Qwen3-8B"
59+
60+
4861# ===============================================================================
4962# 2. 动态生成配置文件 (从yaml template生成yaml)
5063# ===============================================================================
@@ -60,6 +73,10 @@ sed -e "s|{{SUFFIX}}|${SUFFIX}|g" \
6073 -e " s|{{RM_WEIGHT}}|${RM_WEIGHT} |g" \
6174 -e " s|{{PRESENTATION_QUALITY_WEIGHT}}|${PRESENTATION_QUALITY_WEIGHT} |g" \
6275 -e " s|{{GROUNDING_WEIGHT}}|${GROUNDING_WEIGHT} |g" \
76+ -e " s|{{CGCV_WEIGHT}}|${CGCV_WEIGHT} |g" \
77+ -e " s|{{AUDIT_WEIGHT}}|${AUDIT_WEIGHT} |g" \
78+ -e " s|{{TRACEABILITY_WEIGHT}}|${TRACEABILITY_WEIGHT} |g" \
79+ -e " s|{{EBTU_WEIGHT}}|${EBTU_WEIGHT} |g" \
6380 -e " s|{{OPENJUDGE_LLM}}|${OPENJUDGE_LLM} |g" \
6481 -e " s|{{RM_LLM}}|${RM_LLM} |g" \
6582 -e " s|{{JUDGE_CONCURRENCY}}|${JUDGE_CONCURRENCY} |g" \
@@ -75,7 +92,7 @@ sed -e "s|{{SUFFIX}}|${SUFFIX}|g" \
7592 ${AJET_ROOT} /${CONFIG_TEMPLATE} > ${CONFIG_FILE}
7693
7794echo " 配置文件已生成: ${CONFIG_FILE} "
78- echo " 参数确认: RM=${RM_WEIGHT} , PresentationQuality=${PRESENTATION_QUALITY_WEIGHT} , Grounding=${GROUNDING_WEIGHT} , OpenJudge=${OPENJUDGE_LLM} , RM_LLM=${RM_LLM} "
95+ echo " 参数确认: RM=${RM_WEIGHT} , PresentationQuality=${PRESENTATION_QUALITY_WEIGHT} , Grounding=${GROUNDING_WEIGHT} , CGCV= ${CGCV_WEIGHT} , Audit= ${AUDIT_WEIGHT} , Traceability= ${TRACEABILITY_WEIGHT} , EBTU= ${EBTU_WEIGHT} , OpenJudge=${OPENJUDGE_LLM} , RM_LLM=${RM_LLM} "
7996
8097
8198# ===============================================================================
@@ -119,15 +136,16 @@ export RAY_CLUSTER_MODE="multi_node"
119136# ===============================================================================
120137# 6. 主流程
121138# ===============================================================================
122- log " 节点数: ${NNODES} , 每节点GPU数: ${GPUS_PER_NODE} "
123- mkdir -p ${LOG_DIR}
124- mkdir -p $( dirname ${CONFIG_FILE} )
139+ log " 单机调试模式: NNODES=${NNODES} , GPUS_PER_NODE=${GPUS_PER_NODE} "
125140
126141# ===============================================================================
127142# 6.1 Master 节点启动流程
128143# ===============================================================================
129144# 启动训练任务(最核心)
145+ # 请注意只有单节点需要--with-ray 多节点应该删除
130146python ajet/launcher.py \
131147 --conf ${CONFIG_FILE} \
148+ --with-deepfinance \
149+ --with-ray \
132150 --backbone=" debug" \
133151 2>&1 | tee ${TRAIN_LOG}
0 commit comments