@@ -265,32 +265,38 @@ pub async fn semantic_context_for_diff(
265265 ) ;
266266 let preferred_file_ranks = build_preferred_file_ranks ( preferred_files) ;
267267
268- let mut seen = HashSet :: new ( ) ;
269- let mut chunks = Vec :: new ( ) ;
268+ build_semantic_context_chunks ( matches, limit, & preferred_file_ranks)
269+ }
270+
271+ fn build_semantic_context_chunks (
272+ matches : Vec < SemanticMatch > ,
273+ limit : usize ,
274+ preferred_file_ranks : & HashMap < PathBuf , usize > ,
275+ ) -> Vec < LLMContextChunk > {
276+ if limit == 0 {
277+ return Vec :: new ( ) ;
278+ }
279+
280+ let similar_implementations = select_similar_implementation_matches ( & matches, limit. min ( 2 ) ) ;
281+ let mut seen = similar_implementations
282+ . iter ( )
283+ . map ( |semantic_match| semantic_match. chunk . key . clone ( ) )
284+ . collect :: < HashSet < _ > > ( ) ;
285+ let mut chunks = similar_implementations
286+ . into_iter ( )
287+ . map ( |semantic_match| {
288+ build_similar_implementation_chunk ( semantic_match, preferred_file_ranks)
289+ } )
290+ . collect :: < Vec < _ > > ( ) ;
291+
270292 for semantic_match in matches {
271293 if !seen. insert ( semantic_match. chunk . key . clone ( ) ) {
272294 continue ;
273295 }
274- let ranking_note = preferred_file_ranks
275- . get ( & semantic_match. chunk . file_path )
276- . map ( |rank| format ! ( ", graph-ranked file #{}" , rank + 1 ) )
277- . unwrap_or_default ( ) ;
278- let content = format ! (
279- "Semantic match (similarity {:.2}{})\n Symbol: {}\n Summary: {}\n Code:\n {}" ,
280- semantic_match. similarity,
281- ranking_note,
282- semantic_match. chunk. symbol_name,
283- semantic_match. chunk. summary,
284- semantic_match. chunk. code_excerpt,
285- ) ;
286- chunks. push (
287- LLMContextChunk :: reference ( semantic_match. chunk . file_path . clone ( ) , content)
288- . with_line_range ( semantic_match. chunk . line_range )
289- . with_provenance ( ContextProvenance :: semantic_retrieval (
290- semantic_match. similarity ,
291- semantic_match. chunk . symbol_name . clone ( ) ,
292- ) ) ,
293- ) ;
296+ chunks. push ( build_semantic_match_chunk (
297+ & semantic_match,
298+ preferred_file_ranks,
299+ ) ) ;
294300 if chunks. len ( ) >= limit {
295301 break ;
296302 }
@@ -299,6 +305,87 @@ pub async fn semantic_context_for_diff(
299305 chunks
300306}
301307
308+ fn select_similar_implementation_matches (
309+ matches : & [ SemanticMatch ] ,
310+ limit : usize ,
311+ ) -> Vec < & SemanticMatch > {
312+ if limit == 0 {
313+ return Vec :: new ( ) ;
314+ }
315+
316+ let mut seen_files = HashSet :: new ( ) ;
317+ let mut selected = Vec :: new ( ) ;
318+
319+ for semantic_match in matches {
320+ if !seen_files. insert ( semantic_match. chunk . file_path . clone ( ) ) {
321+ continue ;
322+ }
323+
324+ selected. push ( semantic_match) ;
325+ if selected. len ( ) >= limit {
326+ break ;
327+ }
328+ }
329+
330+ selected
331+ }
332+
333+ fn build_semantic_match_chunk (
334+ semantic_match : & SemanticMatch ,
335+ preferred_file_ranks : & HashMap < PathBuf , usize > ,
336+ ) -> LLMContextChunk {
337+ let ranking_note =
338+ graph_ranked_file_note ( preferred_file_ranks, & semantic_match. chunk . file_path ) ;
339+ let content = format ! (
340+ "Semantic match (similarity {:.2}{})\n Symbol: {}\n Summary: {}\n Code:\n {}" ,
341+ semantic_match. similarity,
342+ ranking_note,
343+ semantic_match. chunk. symbol_name,
344+ semantic_match. chunk. summary,
345+ semantic_match. chunk. code_excerpt,
346+ ) ;
347+
348+ LLMContextChunk :: reference ( semantic_match. chunk . file_path . clone ( ) , content)
349+ . with_line_range ( semantic_match. chunk . line_range )
350+ . with_provenance ( ContextProvenance :: semantic_retrieval (
351+ semantic_match. similarity ,
352+ semantic_match. chunk . symbol_name . clone ( ) ,
353+ ) )
354+ }
355+
356+ fn build_similar_implementation_chunk (
357+ semantic_match : & SemanticMatch ,
358+ preferred_file_ranks : & HashMap < PathBuf , usize > ,
359+ ) -> LLMContextChunk {
360+ let ranking_note =
361+ graph_ranked_file_note ( preferred_file_ranks, & semantic_match. chunk . file_path ) ;
362+ let content = format ! (
363+ "Similar implementation (similarity {:.2}{})\n Compare this implementation for repeated patterns or divergences.\n Symbol: {}\n Summary: {}\n Code:\n {}" ,
364+ semantic_match. similarity,
365+ ranking_note,
366+ semantic_match. chunk. symbol_name,
367+ semantic_match. chunk. summary,
368+ semantic_match. chunk. code_excerpt,
369+ ) ;
370+
371+ LLMContextChunk :: reference ( semantic_match. chunk . file_path . clone ( ) , content)
372+ . with_line_range ( semantic_match. chunk . line_range )
373+ . with_provenance ( ContextProvenance :: similar_implementation (
374+ semantic_match. similarity ,
375+ semantic_match. chunk . symbol_name . clone ( ) ,
376+ ) )
377+ }
378+
379+ fn graph_ranked_file_note (
380+ preferred_file_ranks : & HashMap < PathBuf , usize > ,
381+ file_path : & Path ,
382+ ) -> String {
383+ preferred_file_ranks
384+ . get ( file_path)
385+ . map ( |rank| format ! ( ", graph-ranked file #{}" , rank + 1 ) )
386+ . unwrap_or_default ( )
387+ }
388+
302389#[ allow( dead_code) ]
303390pub fn find_related_chunks (
304391 index : & SemanticIndex ,
@@ -662,7 +749,11 @@ mod tests {
662749
663750 let chunks = semantic_context_for_diff ( & index, & diff, None , None , 3 , 0.1 , & [ ] ) . await ;
664751 assert_eq ! ( chunks. len( ) , 1 ) ;
665- assert ! ( chunks[ 0 ] . content. contains( "Semantic match" ) ) ;
752+ assert ! ( chunks[ 0 ] . content. contains( "Similar implementation" ) ) ;
753+ assert ! ( matches!(
754+ chunks[ 0 ] . provenance,
755+ Some ( ContextProvenance :: SimilarImplementation { .. } )
756+ ) ) ;
666757 }
667758
668759 #[ test]
@@ -732,4 +823,72 @@ mod tests {
732823 assert_eq ! ( matches[ 0 ] . chunk. file_path, PathBuf :: from( "src/graph.rs" ) ) ;
733824 assert_eq ! ( matches[ 1 ] . chunk. file_path, PathBuf :: from( "src/other.rs" ) ) ;
734825 }
826+
827+ #[ test]
828+ fn build_semantic_context_chunks_highlights_similar_implementations_first ( ) {
829+ let matches = vec ! [
830+ SemanticMatch {
831+ chunk: SemanticChunk {
832+ key: "src/auth_guard.rs:validate:1:5" . to_string( ) ,
833+ file_path: PathBuf :: from( "src/auth_guard.rs" ) ,
834+ symbol_name: "validate_admin" . to_string( ) ,
835+ line_range: ( 1 , 5 ) ,
836+ summary: "Auth guard before a query" . to_string( ) ,
837+ embedding_text: "auth guard before a query" . to_string( ) ,
838+ code_excerpt: "fn validate_admin() {}" . to_string( ) ,
839+ embedding: local_hash_embedding( "auth guard before a query" ) ,
840+ content_hash: "guard" . to_string( ) ,
841+ } ,
842+ similarity: 0.93 ,
843+ } ,
844+ SemanticMatch {
845+ chunk: SemanticChunk {
846+ key: "src/member_guard.rs:validate:1:5" . to_string( ) ,
847+ file_path: PathBuf :: from( "src/member_guard.rs" ) ,
848+ symbol_name: "validate_member" . to_string( ) ,
849+ line_range: ( 1 , 5 ) ,
850+ summary: "Member auth guard before a query" . to_string( ) ,
851+ embedding_text: "member auth guard before a query" . to_string( ) ,
852+ code_excerpt: "fn validate_member() {}" . to_string( ) ,
853+ embedding: local_hash_embedding( "member auth guard before a query" ) ,
854+ content_hash: "member" . to_string( ) ,
855+ } ,
856+ similarity: 0.89 ,
857+ } ,
858+ SemanticMatch {
859+ chunk: SemanticChunk {
860+ key: "src/sanitize.rs:sanitize:1:5" . to_string( ) ,
861+ file_path: PathBuf :: from( "src/sanitize.rs" ) ,
862+ symbol_name: "sanitize_name" . to_string( ) ,
863+ line_range: ( 1 , 5 ) ,
864+ summary: "Sanitize a username before building a query" . to_string( ) ,
865+ embedding_text: "sanitize a username before building a query" . to_string( ) ,
866+ code_excerpt: "fn sanitize_name() {}" . to_string( ) ,
867+ embedding: local_hash_embedding( "sanitize a username before building a query" ) ,
868+ content_hash: "sanitize" . to_string( ) ,
869+ } ,
870+ similarity: 0.82 ,
871+ } ,
872+ ] ;
873+
874+ let chunks = build_semantic_context_chunks (
875+ matches,
876+ 3 ,
877+ & build_preferred_file_ranks ( & [ PathBuf :: from ( "src/auth_guard.rs" ) ] ) ,
878+ ) ;
879+
880+ assert_eq ! ( chunks. len( ) , 3 ) ;
881+ assert ! ( chunks[ 0 ] . content. contains( "Similar implementation" ) ) ;
882+ assert ! ( chunks[ 1 ] . content. contains( "Similar implementation" ) ) ;
883+ assert ! ( chunks[ 2 ] . content. contains( "Semantic match" ) ) ;
884+ assert ! ( matches!(
885+ chunks[ 0 ] . provenance,
886+ Some ( ContextProvenance :: SimilarImplementation { .. } )
887+ ) ) ;
888+ assert ! ( matches!(
889+ chunks[ 2 ] . provenance,
890+ Some ( ContextProvenance :: SemanticRetrieval { .. } )
891+ ) ) ;
892+ assert ! ( chunks[ 0 ] . content. contains( "graph-ranked file #1" ) ) ;
893+ }
735894}
0 commit comments