88import time
99import logging
1010from datetime import datetime
11- from typing import Dict , Any , Optional , Tuple , List , Type
11+ from typing import Dict , Any , Optional , Tuple , List
1212
1313from ajet .task_judge .base_judge import BaseJudge
1414from ajet .workflow import WorkflowOutput , WorkflowTask
1515
1616from openjudge .models .openai_chat_model import OpenAIChatModel
1717from openjudge .runner .grading_runner import GraderConfig , GradingRunner
18- from openjudge .graders .base_grader import BaseGrader
19- from tutorial .example_deep_finance .judge import PresentationQualityGrader , GroundingGrader , AuditGrader , EBTUTraceabilityGrader
20-
21- # Finance Graders from OpenJudge cookbooks
22- from cookbooks .finance_grader .stock_analysis .valuation_analysis import ValuationAnalysisGrader
23- from cookbooks .finance_grader .stock_analysis .fundamental_analysis import FundamentalAnalysisGrader
24- from cookbooks .finance_grader .stock_analysis .overall_logic import OverallLogicGrader
25- from cookbooks .finance_grader .stock_analysis .stock_risk_analysis import StockRiskAnalysisGrader
26- from cookbooks .finance_grader .macro_analysis .macro_analysis import MacroAnalysisGrader
27- from cookbooks .finance_grader .macro_analysis .concept_explanation import ConceptExplanationGrader
28- from cookbooks .finance_grader .industry_research .characteristics_analysis import CharacteristicsAnalysisGrader
29- from cookbooks .finance_grader .industry_research .risk_analysis import RiskAnalysisGrader
30- from cookbooks .finance_grader .industry_research .underlying_comparison import UnderlyingComparisonGrader
31- from cookbooks .finance_grader .event_interpretation .event_analysis import EventAnalysisGrader
32- from cookbooks .finance_grader .event_interpretation .event_identification import EventIdentificationGrader
33- from cookbooks .finance_grader .stock_search .search_relevance import SearchRelevanceGrader
34- from cookbooks .finance_grader .stock_search .search_integrity import SearchIntegrityGrader
35- from cookbooks .finance_grader .stock_search .search_timeliness import SearchTimelinessGrader
36-
37-
38- # OpenJudge imports
18+ from tutorial .example_deep_finance .judge import (
19+ PresentationQualityGrader ,
20+ GroundingGrader ,
21+ AuditGrader ,
22+ EBTUTraceabilityGrader ,
23+ FinanceCompositionEvaluator ,
24+ )
3925# =============================================================================
4026# 全局辅助函数
4127# =============================================================================
@@ -76,135 +62,6 @@ def load_reference_answers_from_file(file_path: str) -> Tuple[Dict[str, str], Di
7662 raise ValueError (f"Error loading reference answers: { e } " )
7763
7864
79- # =============================================================================
80- # FinanceCompositionEvaluator - 基于 OpenJudge 的 Finance 评估器
81- # =============================================================================
82-
83- class FinanceCompositionEvaluator :
84- """
85- 基于 OpenJudge 的 Finance 组合评估器(替代 rm_gallery.FinanceComposition)
86-
87- 功能:
88- - 根据 domain 路由到对应的 grader 集合
89- - 执行 pairwise 评估(比较 training answer 和 reference answer)
90- - 返回 0-1 范围的分数
91-
92- 支持的 domain:
93- - stock_analysis: 股票分析
94- - industry_research: 行业研究
95- - macro_analysis: 宏观分析
96- - event_interpretation: 事件解读
97- - stock_search: 股票搜索
98- """
99-
100- # Domain 到 Grader 类的映射(与 RM-Gallery 保持一致)
101- DOMAIN_GRADERS : Dict [str , List [Type [BaseGrader ]]] = {
102- "stock_analysis" : [
103- ValuationAnalysisGrader ,
104- # FundamentalAnalysisGrader,
105- # OverallLogicGrader,
106- # StockRiskAnalysisGrader,
107- ],
108- "industry_research" : [
109- CharacteristicsAnalysisGrader ,
110- # RiskAnalysisGrader,
111- # UnderlyingComparisonGrader,
112- ],
113- "macro_analysis" : [
114- MacroAnalysisGrader ,
115- # ConceptExplanationGrader,
116- ],
117- "event_interpretation" : [
118- EventAnalysisGrader ,
119- # EventIdentificationGrader,
120- ],
121- "stock_search" : [
122- SearchRelevanceGrader ,
123- # SearchIntegrityGrader,
124- # SearchTimelinessGrader,
125- ],
126- }
127-
128- def __init__ (self , model : OpenAIChatModel , params : Dict [str , Any ] = None ):
129- """
130- 初始化 FinanceCompositionEvaluator
131-
132- Args:
133- model: OpenAIChatModel 实例
134- params: 额外参数(保留兼容性)
135- """
136- self .model = model
137- self .params = params or {}
138- self ._grader_cache : Dict [str , List [BaseGrader ]] = {}
139-
140- def _get_graders_for_domain (self , domain : str ) -> List [BaseGrader ]:
141- """
142- 获取指定 domain 的 grader 实例列表(带缓存)
143- """
144- if domain not in self ._grader_cache :
145- grader_classes = self .DOMAIN_GRADERS .get (domain , [])
146- self ._grader_cache [domain ] = [
147- grader_cls (model = self .model ) for grader_cls in grader_classes
148- ]
149- return self ._grader_cache [domain ]
150-
151- async def aevaluate (self , query : str , current : str , reference : str , domain : str ) -> float :
152- """
153- 执行 pairwise 评估(异步版本,避免重复创建 event loop)
154-
155- Args:
156- query: 用户查询
157- current: 当前模型生成的回答 (training)
158- reference: 参考答案
159- domain: 任务领域(用于路由到对应 graders)
160-
161- Returns:
162- float: 0-1 范围的分数
163- - 1.0: current 优于 reference
164- - 0.0: reference 优于 current
165- - 0.5: 无法评估或出错
166- """
167- if not domain or domain not in self .DOMAIN_GRADERS :
168- print (f"⚠️ FinanceCompositionEvaluator: Unknown domain '{ domain } ', returning 0.5" )
169- return 0.5
170-
171- graders = self ._get_graders_for_domain (domain )
172- if not graders :
173- print (f"⚠️ FinanceCompositionEvaluator: No graders for domain '{ domain } ', returning 0.5" )
174- return 0.5
175-
176- # 运行所有 graders
177- scores = []
178- for grader in graders :
179- try :
180- result = await grader .aevaluate (
181- query = query ,
182- answer_1 = current , # training model output
183- answer_2 = reference , # reference answer
184- )
185-
186- # 解析 GraderRank 结果
187- if hasattr (result , 'rank' ) and isinstance (result .rank , list ):
188- # rank = [1, 2] 表示 answer_1 (current) 更好 -> score = 1.0
189- # rank = [2, 1] 表示 answer_2 (reference) 更好 -> score = 0.0
190- if result .rank [0 ] == 1 :
191- scores .append (1.0 )
192- else :
193- scores .append (0.0 )
194- else :
195- scores .append (0.5 ) # 无法解析,返回中间值
196-
197- except Exception as e :
198- grader_name = getattr (grader , 'name' , grader .__class__ .__name__ )
199- print (f"⚠️ FinanceCompositionEvaluator: Grader { grader_name } failed: { e } " )
200- scores .append (0.5 )
201-
202- # 计算平均分数
203- if scores :
204- return sum (scores ) / len (scores )
205- return 0.5
206-
207-
20865# =============================================================================
20966# DeepFinanceJudgeByOpenJudge 类
21067# =============================================================================
@@ -287,6 +144,7 @@ def _init_finance_evaluator(self):
287144 初始化 FinanceCompositionEvaluator(仅当 finance_weight > 0 时)
288145
289146 使用 OpenJudge 的 finance graders 替代原 rm_gallery 实现
147+ 支持独立的 finance_llm 配置,若未配置则复用 openjudge_llm
290148 """
291149 self ._finance_enabled = (self .w .get ("finance" , 0 ) > 0 )
292150 if self ._finance_enabled :
@@ -302,15 +160,35 @@ def _create_finance_evaluator(self):
302160 """
303161 创建 FinanceCompositionEvaluator 实例(基于 OpenJudge)
304162
305- 复用已初始化的 OpenJudge model,无需单独配置
163+ 支持独立的 finance_llm 配置:
164+ - 若 config.ajet.judge.finance_llm 有值,则使用独立的 model
165+ - 若未配置或为空,则复用已初始化的 OpenJudge model
306166 """
307167 try :
308- # 复用 OpenJudge model(已在 _init_openjudge_model 中初始化)
168+ # 检查是否配置了独立的 finance_llm
169+ finance_llm_name = getattr (self .config .ajet .judge , "finance_llm" , None )
170+
171+ if finance_llm_name and finance_llm_name .strip ():
172+ # 使用独立的 finance model
173+ finance_base_url = os .environ .get ("FINANCE_BASE_URL" ) or os .environ .get ("OPENJUDGE_BASE_URL" )
174+ finance_api_key = os .environ .get ("FINANCE_API_KEY" ) or os .environ .get ("OPENJUDGE_API_KEY" )
175+
176+ finance_model = OpenAIChatModel (
177+ model = finance_llm_name ,
178+ base_url = finance_base_url ,
179+ api_key = finance_api_key ,
180+ )
181+ print (f"[Init FinanceCompositionEvaluator] Using dedicated finance model: { finance_llm_name } " )
182+ else :
183+ # 复用 OpenJudge model(已在 _init_openjudge_model 中初始化)
184+ finance_model = self .model
185+ print (f"[Init FinanceCompositionEvaluator] Reusing OpenJudge model" )
186+
309187 self .finance_evaluator = FinanceCompositionEvaluator (
310- model = self . model ,
188+ model = finance_model ,
311189 params = {"is_parallel" : True }
312190 )
313- print (f"[Init FinanceCompositionEvaluator] Using OpenJudge model, domains={ list (FinanceCompositionEvaluator .DOMAIN_GRADERS .keys ())} " )
191+ print (f"[Init FinanceCompositionEvaluator] domains={ list (FinanceCompositionEvaluator .DOMAIN_GRADERS .keys ())} " )
314192 except Exception as e :
315193 print (f"✗ Failed to initialize FinanceCompositionEvaluator: { e } " )
316194 import traceback
0 commit comments