11import abc
22import os
33import re
4- from typing import Dict , Any , List
4+ from typing import Dict , Any , List , Optional
55
66import yaml
77from jinja2 import Template
@@ -16,11 +16,20 @@ class BaseReviewer(abc.ABC):
1616
1717 def __init__ (self , prompt_key : str ):
1818 self .client = Factory ().getClient ()
19- self .prompts = self ._load_prompts (prompt_key , os . getenv ( "REVIEW_STYLE" , "professional" ) )
19+ self .prompts = self ._load_prompts (prompt_key )
2020
21- def _load_prompts (self , prompt_key : str , style = "professional" ) -> Dict [str , Any ]:
21+ def _load_prompts (
22+ self , prompt_key : str , style : Optional [str ] = None , prompt_templates_file : Optional [str ] = None
23+ ) -> Dict [str , Any ]:
2224 """加载提示词配置"""
23- prompt_templates_file = "conf/prompt_templates.yml"
25+ if not style :
26+ # 如果未提供, 从环境变量中获取审查风格,默认为 "professional"
27+ style = os .getenv ("REVIEW_STYLE" , "professional" )
28+
29+ if not prompt_templates_file :
30+ # 如果未提供, 使用默认的提示词配置文件路径
31+ prompt_templates_file = "conf/prompt_templates.yml"
32+
2433 try :
2534 # 在打开 YAML 文件时显式指定编码为 UTF-8,避免使用系统默认的 GBK 编码。
2635 with open (prompt_templates_file , "r" , encoding = "utf-8" ) as file :
@@ -60,12 +69,13 @@ class CodeReviewer(BaseReviewer):
6069 def __init__ (self ):
6170 super ().__init__ ("code_review_prompt" )
6271
63- def review_and_strip_code (self , changes_text : str , commits_text : str = "" ) -> str :
72+ def review_and_strip_code (self , changes_text : str , commits_text : str = "" , project_name : str = "" ) -> str :
6473 """
6574 Review判断changes_text超出取前REVIEW_MAX_TOKENS个token,超出则截断changes_text,
6675 调用review_code方法,返回review_result,如果review_result是markdown格式,则去掉头尾的```
67- :param changes_text:
68- :param commits_text:
76+ :param changes_text: 代码变更内容
77+ :param commits_text: 提交信息
78+ :param project_name: 项目名称
6979 :return:
7080 """
7181 # 如果超长,取前REVIEW_MAX_TOKENS个token
@@ -80,29 +90,152 @@ def review_and_strip_code(self, changes_text: str, commits_text: str = "") -> st
8090 if tokens_count > review_max_tokens :
8191 changes_text = truncate_text_by_tokens (changes_text , review_max_tokens )
8292
83- review_result = self .review_code (changes_text , commits_text ).strip ()
93+ review_result = self .review_code (changes_text , commits_text , project_name ).strip ()
8494 if review_result .startswith ("```markdown" ) and review_result .endswith ("```" ):
8595 return review_result [11 :- 3 ].strip ()
8696 return review_result
8797
88- def review_code (self , diffs_text : str , commits_text : str = "" ) -> str :
98+ def review_code (self , diffs_text : str , commits_text : str = "" , project_name : str = "" ) -> str :
8999 """Review 代码并返回结果"""
100+ normalized_project_name = project_name .replace ("-" , "_" ) if project_name else project_name
101+ project_prompts_path = os .getenv (f"{ normalized_project_name .upper ()} _PROMPT" , None )
102+
103+ # 按需重新加载 prompts 配置, 同时也可以支持项目级别提示词的热加载
104+ prompts = (
105+ self ._load_prompts (prompt_key = "code_review_prompt" , prompt_templates_file = project_prompts_path )
106+ if project_prompts_path
107+ else self .prompts
108+ )
90109 messages = [
91- self . prompts ["system_message" ],
110+ prompts ["system_message" ],
92111 {
93112 "role" : "user" ,
94- "content" : self .prompts ["user_message" ]["content" ].format (
95- diffs_text = diffs_text , commits_text = commits_text
96- ),
113+ "content" : prompts ["user_message" ]["content" ].format (diffs_text = diffs_text , commits_text = commits_text ),
97114 },
98115 ]
99116 return self .call_llm (messages )
100117
118+ def review_changes_in_batches (self , changes : List [Dict [str , Any ]], commits_text : str = "" , project_name : str = "" ) -> str :
119+ """
120+ 按文件批次审查代码变更,然后汇总所有审查结果
121+ :param changes: 代码变更列表,每个元素是一个包含文件信息的字典
122+ :param commits_text: 提交信息
123+ :param project_name: 项目名称
124+ :return: 汇总后的审查结果
125+ """
126+ if not changes :
127+ logger .info ("代码变更为空" )
128+ return "代码为空"
129+
130+ # 检查是否启用批量审查
131+ batch_review_enabled = os .getenv ("BATCH_REVIEW_ENABLED" , "1" ) == "1"
132+
133+ # 如果未启用批量审查,使用原有的一次性审查方式
134+ if not batch_review_enabled :
135+ logger .info ("批量审查功能未启用,使用传统一次性审查方式" )
136+ return self .review_and_strip_code (str (changes ), commits_text , project_name )
137+
138+ review_max_tokens = int (os .getenv ("REVIEW_MAX_TOKENS" , 10000 ))
139+ # 获取每批次审查的文件数量配置
140+ files_per_batch = int (os .getenv ("BATCH_REVIEW_FILES_PER_BATCH" , 1 ))
141+ logger .info (f"批量审查已启用,每批次审查 { files_per_batch } 个文件" )
142+
143+ partial_reviews = []
144+ total_files = len (changes )
145+
146+ # 按配置的批次大小分批进行审查
147+ for batch_start in range (0 , total_files , files_per_batch ):
148+ batch_end = min (batch_start + files_per_batch , total_files )
149+ batch_changes = changes [batch_start :batch_end ]
150+ batch_num = (batch_start // files_per_batch ) + 1
151+ total_batches = (total_files + files_per_batch - 1 ) // files_per_batch
152+
153+ logger .info (f"正在审查第 { batch_num } /{ total_batches } 批次 (文件 { batch_start + 1 } -{ batch_end } /{ total_files } )" )
154+
155+ # 收集当前批次的文件路径
156+ batch_file_paths = [
157+ change .get ('new_path' ) or change .get ('old_path' , 'unknown' )
158+ for change in batch_changes
159+ ]
160+
161+ # 将批次内的文件转换为文本
162+ batch_text = str (batch_changes )
163+
164+ # 计算tokens数量,如果超过限制则截断
165+ tokens_count = count_tokens (batch_text )
166+ if tokens_count > review_max_tokens :
167+ logger .warning (f"批次 { batch_num } 的变更超过 { review_max_tokens } tokens,将截断" )
168+ batch_text = truncate_text_by_tokens (batch_text , review_max_tokens )
169+
170+ # 审查当前批次,传递 project_name 参数
171+ try :
172+ review_result = self .review_code (batch_text , commits_text , project_name ).strip ()
173+ if review_result .startswith ("```markdown" ) and review_result .endswith ("```" ):
174+ review_result = review_result [11 :- 3 ].strip ()
175+
176+ # 添加批次标识
177+ batch_header = f"### 批次 { batch_num } (文件: { ', ' .join (batch_file_paths )} )\n "
178+ partial_reviews .append (f"{ batch_header } { review_result } " )
179+ logger .info (f"批次 { batch_num } 审查完成" )
180+ except Exception as e :
181+ logger .error (f"审查批次 { batch_num } 时出错: { e } " )
182+ partial_reviews .append (f"### 批次 { batch_num } \n 审查失败: { str (e )} " )
183+
184+ # 如果只有一个批次,直接返回结果(去掉批次标识)
185+ if len (partial_reviews ) == 1 :
186+ # 去掉批次标题行
187+ result = partial_reviews [0 ]
188+ lines = result .split ('\n ' , 1 )
189+ return lines [1 ] if len (lines ) > 1 else result
190+
191+ # 汇总多个批次的审查结果
192+ logger .info (f"开始汇总 { len (partial_reviews )} 个批次的审查结果" )
193+ summary_result = self ._summarize_reviews (partial_reviews , project_name )
194+ return summary_result
195+
196+ def _summarize_reviews (self , partial_reviews : List [str ], project_name : str = "" ) -> str :
197+ """
198+ 使用 summary_merge_review_prompt 汇总多个审查结果
199+ :param partial_reviews: 各批次的审查结果列表
200+ :param project_name: 项目名称
201+ :return: 汇总后的总审查报告
202+ """
203+ # 加载汇总提示词,支持项目级别的自定义
204+ normalized_project_name = project_name .replace ("-" , "_" ) if project_name else project_name
205+ project_prompts_path = os .getenv (f"{ normalized_project_name .upper ()} _PROMPT" , None )
206+
207+ summary_prompts = (
208+ self ._load_prompts (prompt_key = "summary_merge_review_prompt" , prompt_templates_file = project_prompts_path )
209+ if project_prompts_path
210+ else self ._load_prompts ("summary_merge_review_prompt" , os .getenv ("REVIEW_STYLE" , "professional" ))
211+ )
212+
213+ # 拼接所有分批审查结果
214+ partial_reviews_text = "\n \n ---\n \n " .join (partial_reviews )
215+
216+ # 构建汇总请求消息
217+ messages = [
218+ summary_prompts ["system_message" ],
219+ {
220+ "role" : "user" ,
221+ "content" : summary_prompts ["user_message" ]["content" ].format (
222+ partial_reviews_text = partial_reviews_text
223+ ),
224+ },
225+ ]
226+
227+ # 调用LLM进行汇总
228+ summary_result = self .call_llm (messages ).strip ()
229+ if summary_result .startswith ("```markdown" ) and summary_result .endswith ("```" ):
230+ summary_result = summary_result [11 :- 3 ].strip ()
231+
232+ logger .info ("审查结果汇总完成" )
233+ return summary_result
234+
101235 @staticmethod
102236 def parse_review_score (review_text : str ) -> int :
103237 """解析 AI 返回的 Review 结果,返回评分"""
104238 if not review_text :
105239 return 0
106240 match = re .search (r"总分[::]\s*(\d+)分?" , review_text )
107241 return int (match .group (1 )) if match else 0
108-
0 commit comments