@@ -4,7 +4,6 @@ use once_cell::sync::Lazy;
44use regex:: Regex ;
55use std:: collections:: HashMap ;
66use std:: path:: Path ;
7- #[ cfg( test) ]
87use std:: path:: PathBuf ;
98use std:: time:: Instant ;
109use tracing:: { info, warn} ;
@@ -14,6 +13,7 @@ use std::sync::Arc;
1413use crate :: adapters;
1514use crate :: config;
1615use crate :: core;
16+ use crate :: core:: convention_learner:: ConventionStore ;
1717use crate :: core:: offline:: optimize_prompt_for_local;
1818use crate :: output:: OutputFormat ;
1919use crate :: parsing:: parse_llm_response;
@@ -135,9 +135,24 @@ async fn review_diff_content_raw_inner(
135135 }
136136 }
137137
138- // Build enhanced review context from the new modules
139- let mut enhanced_ctx =
140- core:: build_enhanced_context ( & diffs, & HashMap :: new ( ) , None , None , None , None ) ;
138+ // Gather git history for enhanced context
139+ let git_log_output = gather_git_log ( repo_path) ;
140+
141+ // Load convention store for learned review patterns
142+ let convention_store_path = resolve_convention_store_path ( & config) ;
143+ let convention_json = convention_store_path
144+ . as_ref ( )
145+ . and_then ( |p| std:: fs:: read_to_string ( p) . ok ( ) ) ;
146+
147+ // Build enhanced review context with real data from the repository
148+ let mut enhanced_ctx = core:: build_enhanced_context (
149+ & diffs,
150+ & HashMap :: new ( ) ,
151+ git_log_output. as_deref ( ) ,
152+ None ,
153+ convention_json. as_deref ( ) ,
154+ None ,
155+ ) ;
141156 let enhanced_guidance = core:: generate_enhanced_guidance ( & enhanced_ctx, "rs" ) ;
142157 if !enhanced_guidance. is_empty ( ) {
143158 info ! (
@@ -268,6 +283,12 @@ async fn review_diff_content_raw_inner(
268283 }
269284 }
270285
286+ // Include related files: reverse dependencies (callers) and test files
287+ if let Some ( ref index) = symbol_index {
288+ let caller_chunks = gather_related_file_context ( index, & diff. file_path , repo_path) ;
289+ context_chunks. extend ( caller_chunks) ;
290+ }
291+
271292 // Get path-specific configuration
272293 let path_config = config. get_path_config ( & diff. file_path ) . cloned ( ) ;
273294
@@ -569,6 +590,15 @@ async fn review_diff_content_raw_inner(
569590 // Apply enhanced filters from convention learning and composable pipeline
570591 let processed_comments = core:: apply_enhanced_filters ( & mut enhanced_ctx, processed_comments) ;
571592
593+ // Apply convention-based suppression: filter out comments matching suppression patterns
594+ let processed_comments =
595+ apply_convention_suppression ( processed_comments, & enhanced_ctx. convention_store ) ;
596+
597+ // Save updated convention store back to disk
598+ if let Some ( ref store_path) = convention_store_path {
599+ save_convention_store ( & enhanced_ctx. convention_store , store_path) ;
600+ }
601+
572602 Ok ( processed_comments)
573603}
574604
@@ -993,6 +1023,181 @@ fn should_optimize_for_local(config: &config::Config) -> bool {
9931023 config. is_local_endpoint ( )
9941024}
9951025
1026+ /// Run `git log --numstat` against repo_path to gather commit history.
1027+ /// Returns None if the command fails (e.g., not a git repo).
1028+ fn gather_git_log ( repo_path : & Path ) -> Option < String > {
1029+ let output = std:: process:: Command :: new ( "git" )
1030+ . args ( [
1031+ "log" ,
1032+ "--numstat" ,
1033+ "--format=commit %H%nAuthor: %an <%ae>%nDate: %ai%n%n %s%n" ,
1034+ "-100" ,
1035+ ] )
1036+ . current_dir ( repo_path)
1037+ . output ( ) ;
1038+ match output {
1039+ Ok ( out) if out. status . success ( ) => {
1040+ let log_text = String :: from_utf8_lossy ( & out. stdout ) . to_string ( ) ;
1041+ if log_text. trim ( ) . is_empty ( ) {
1042+ None
1043+ } else {
1044+ info ! ( "Gathered git log ({} bytes)" , log_text. len( ) ) ;
1045+ Some ( log_text)
1046+ }
1047+ }
1048+ _ => {
1049+ info ! ( "Git log unavailable (not a git repo or git not found)" ) ;
1050+ None
1051+ }
1052+ }
1053+ }
1054+
1055+ /// Resolve the convention store path from config or default location.
1056+ fn resolve_convention_store_path ( config : & config:: Config ) -> Option < PathBuf > {
1057+ if let Some ( ref path) = config. convention_store_path {
1058+ return Some ( PathBuf :: from ( path) ) ;
1059+ }
1060+ // Default: ~/.local/share/diffscope/conventions.json
1061+ dirs:: data_local_dir ( ) . map ( |d| d. join ( "diffscope" ) . join ( "conventions.json" ) )
1062+ }
1063+
1064+ /// Save the convention store to the given path, creating directories if needed.
1065+ fn save_convention_store ( store : & ConventionStore , path : & PathBuf ) {
1066+ if let Ok ( json) = store. to_json ( ) {
1067+ if let Some ( parent) = path. parent ( ) {
1068+ let _ = std:: fs:: create_dir_all ( parent) ;
1069+ }
1070+ if let Err ( e) = std:: fs:: write ( path, json) {
1071+ warn ! (
1072+ "Failed to save convention store to {}: {}" ,
1073+ path. display( ) ,
1074+ e
1075+ ) ;
1076+ }
1077+ }
1078+ }
1079+
1080+ /// Gather related file context: reverse dependencies (callers) and test files.
1081+ /// These are included as Reference context chunks for the LLM.
1082+ fn gather_related_file_context (
1083+ index : & core:: SymbolIndex ,
1084+ file_path : & Path ,
1085+ repo_path : & Path ,
1086+ ) -> Vec < core:: LLMContextChunk > {
1087+ let mut chunks: Vec < core:: LLMContextChunk > = Vec :: new ( ) ;
1088+
1089+ // 1. Reverse dependencies (files that import/depend on this file)
1090+ let callers = index. reverse_deps ( file_path) ;
1091+ for caller_path in callers. iter ( ) . take ( 3 ) {
1092+ if let Some ( summary) = index. file_summary ( caller_path) {
1093+ let truncated: String = if summary. len ( ) > 2000 {
1094+ format ! ( "{}...[truncated]" , & summary[ ..2000 ] )
1095+ } else {
1096+ summary. to_string ( )
1097+ } ;
1098+ chunks. push ( core:: LLMContextChunk {
1099+ file_path : caller_path. clone ( ) ,
1100+ content : format ! ( "[Caller/dependent file]\n {}" , truncated) ,
1101+ context_type : core:: ContextType :: Reference ,
1102+ line_range : None ,
1103+ } ) ;
1104+ }
1105+ }
1106+
1107+ // 2. Look for matching test files
1108+ let test_files = find_test_files ( file_path, repo_path) ;
1109+ for test_path in test_files. iter ( ) . take ( 2 ) {
1110+ let relative: & Path = test_path. strip_prefix ( repo_path) . unwrap_or ( test_path) ;
1111+ // Read first 60 lines of the test file for context
1112+ if let Ok ( content) = std:: fs:: read_to_string ( test_path) {
1113+ let snippet: String = content. lines ( ) . take ( 60 ) . collect :: < Vec < _ > > ( ) . join ( "\n " ) ;
1114+ if !snippet. is_empty ( ) {
1115+ chunks. push ( core:: LLMContextChunk {
1116+ file_path : relative. to_path_buf ( ) ,
1117+ content : format ! ( "[Test file]\n {}" , snippet) ,
1118+ context_type : core:: ContextType :: Reference ,
1119+ line_range : None ,
1120+ } ) ;
1121+ }
1122+ }
1123+ }
1124+
1125+ chunks
1126+ }
1127+
1128+ /// Find test files that correspond to a given source file.
1129+ /// Looks for patterns like test_<stem>, <stem>_test, <stem>.test, tests/<stem>.
1130+ fn find_test_files ( file_path : & Path , repo_path : & Path ) -> Vec < PathBuf > {
1131+ let stem = match file_path. file_stem ( ) . and_then ( |s| s. to_str ( ) ) {
1132+ Some ( s) => s. to_string ( ) ,
1133+ None => return Vec :: new ( ) ,
1134+ } ;
1135+ let ext = file_path. extension ( ) . and_then ( |e| e. to_str ( ) ) . unwrap_or ( "" ) ;
1136+ let parent = file_path. parent ( ) . unwrap_or ( Path :: new ( "" ) ) ;
1137+
1138+ let candidates: Vec < PathBuf > = vec ! [
1139+ repo_path
1140+ . join( parent)
1141+ . join( format!( "test_{}.{}" , stem, ext) ) ,
1142+ repo_path
1143+ . join( parent)
1144+ . join( format!( "{}_test.{}" , stem, ext) ) ,
1145+ repo_path
1146+ . join( parent)
1147+ . join( format!( "{}.test.{}" , stem, ext) ) ,
1148+ repo_path
1149+ . join( parent)
1150+ . join( format!( "{}.spec.{}" , stem, ext) ) ,
1151+ repo_path
1152+ . join( parent)
1153+ . join( "tests" )
1154+ . join( format!( "{}.{}" , stem, ext) ) ,
1155+ repo_path
1156+ . join( parent)
1157+ . join( "__tests__" )
1158+ . join( format!( "{}.{}" , stem, ext) ) ,
1159+ ] ;
1160+
1161+ candidates
1162+ . into_iter ( )
1163+ . filter ( |p : & PathBuf | p. is_file ( ) )
1164+ . collect ( )
1165+ }
1166+
1167+ /// Apply convention-based suppression: filter out comments whose content
1168+ /// matches learned suppression patterns with high confidence.
1169+ fn apply_convention_suppression (
1170+ comments : Vec < core:: Comment > ,
1171+ convention_store : & ConventionStore ,
1172+ ) -> Vec < core:: Comment > {
1173+ let suppression_patterns = convention_store. suppression_patterns ( ) ;
1174+ if suppression_patterns. is_empty ( ) {
1175+ return comments;
1176+ }
1177+
1178+ let before_count = comments. len ( ) ;
1179+ let filtered: Vec < core:: Comment > = comments
1180+ . into_iter ( )
1181+ . filter ( |comment| {
1182+ let category_str = comment. category . to_string ( ) ;
1183+ let score = convention_store. score_comment ( & comment. content , & category_str) ;
1184+ // Only suppress if the convention store strongly indicates suppression
1185+ // (score <= -0.25 means the team has consistently rejected similar comments)
1186+ score > -0.25
1187+ } )
1188+ . collect ( ) ;
1189+
1190+ let suppressed = before_count. saturating_sub ( filtered. len ( ) ) ;
1191+ if suppressed > 0 {
1192+ info ! (
1193+ "Convention learning suppressed {} comment(s) based on team feedback patterns" ,
1194+ suppressed
1195+ ) ;
1196+ }
1197+
1198+ filtered
1199+ }
1200+
9961201#[ cfg( test) ]
9971202mod tests {
9981203 use super :: * ;
0 commit comments