Skip to content

Commit 294ad93

Browse files
committed
feat(review): surface similar implementation lookup
1 parent 96558cd commit 294ad93

File tree

4 files changed

+280
-25
lines changed

4 files changed

+280
-25
lines changed

TODO.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -64,7 +64,7 @@ This roadmap is derived from deep research into Greptile's public docs, blog, MC
6464
31. [ ] Turn the current symbol graph into a persisted repository graph with durable storage and reload support.
6565
32. [x] Add caller/callee expansion APIs for multi-hop impact analysis from changed symbols.
6666
33. [x] Add contract edges between interfaces, implementations, and API endpoints.
67-
34. [ ] Add "similar implementation" lookup so repeated patterns and divergences are explicit.
67+
34. [x] Add "similar implementation" lookup so repeated patterns and divergences are explicit.
6868
35. [x] Add cross-file blast-radius summaries to findings when a change affects many callers.
6969
36. [x] Add graph freshness/version metadata so reviews know whether they are using stale repository intelligence.
7070
37. [x] Add graph-backed ranking of related files before semantic RAG retrieval.

src/core/context_provenance.rs

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,10 @@ pub enum ContextProvenance {
2121
},
2222
RelatedTestFile,
2323
ReverseDependencySummary,
24+
SimilarImplementation {
25+
similarity: f32,
26+
symbol_name: String,
27+
},
2428
SemanticRetrieval {
2529
similarity: f32,
2630
symbol_name: String,
@@ -56,6 +60,13 @@ impl ContextProvenance {
5660
}
5761
}
5862

63+
pub fn similar_implementation(similarity: f32, symbol_name: impl Into<String>) -> Self {
64+
Self::SimilarImplementation {
65+
similarity,
66+
symbol_name: symbol_name.into(),
67+
}
68+
}
69+
5970
pub fn symbol_graph_path(relation_path: Vec<String>, hops: usize, relevance: f32) -> Self {
6071
Self::SymbolGraphPath {
6172
relation_path,
@@ -69,6 +80,7 @@ impl ContextProvenance {
6980
Self::ActiveReviewRules => 120,
7081
Self::PatternRepositorySource { .. } => 40,
7182
Self::PatternRepositoryContext { .. } => 35,
83+
Self::SimilarImplementation { .. } => 30,
7284
Self::SemanticRetrieval { .. } => 25,
7385
Self::SymbolGraphPath {
7486
relation_path,
@@ -99,6 +111,7 @@ impl ContextProvenance {
99111
pub fn verification_bonus(&self) -> i32 {
100112
match self {
101113
Self::SymbolGraphPath { .. } => 80,
114+
Self::SimilarImplementation { .. } => 35,
102115
Self::SemanticRetrieval { .. } => 30,
103116
_ => 0,
104117
}
@@ -120,6 +133,12 @@ impl ContextProvenance {
120133
}
121134
Self::RelatedTestFile => "related test file".to_string(),
122135
Self::ReverseDependencySummary => "reverse dependency summary".to_string(),
136+
Self::SimilarImplementation {
137+
similarity,
138+
symbol_name,
139+
} => {
140+
format!("similar implementation (similarity={similarity:.2}, symbol={symbol_name})")
141+
}
123142
Self::SemanticRetrieval {
124143
similarity,
125144
symbol_name,
@@ -166,6 +185,14 @@ mod tests {
166185
);
167186
assert_eq!(provenance.ranking_bonus(), 75);
168187
assert_eq!(provenance.verification_bonus(), 80);
188+
189+
let similar = ContextProvenance::similar_implementation(0.91, "validate_user");
190+
assert_eq!(
191+
similar.to_string(),
192+
"similar implementation (similarity=0.91, symbol=validate_user)"
193+
);
194+
assert_eq!(similar.ranking_bonus(), 30);
195+
assert_eq!(similar.verification_bonus(), 35);
169196
}
170197

171198
#[test]

src/core/semantic.rs

Lines changed: 182 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -265,32 +265,38 @@ pub async fn semantic_context_for_diff(
265265
);
266266
let preferred_file_ranks = build_preferred_file_ranks(preferred_files);
267267

268-
let mut seen = HashSet::new();
269-
let mut chunks = Vec::new();
268+
build_semantic_context_chunks(matches, limit, &preferred_file_ranks)
269+
}
270+
271+
fn build_semantic_context_chunks(
272+
matches: Vec<SemanticMatch>,
273+
limit: usize,
274+
preferred_file_ranks: &HashMap<PathBuf, usize>,
275+
) -> Vec<LLMContextChunk> {
276+
if limit == 0 {
277+
return Vec::new();
278+
}
279+
280+
let similar_implementations = select_similar_implementation_matches(&matches, limit.min(2));
281+
let mut seen = similar_implementations
282+
.iter()
283+
.map(|semantic_match| semantic_match.chunk.key.clone())
284+
.collect::<HashSet<_>>();
285+
let mut chunks = similar_implementations
286+
.into_iter()
287+
.map(|semantic_match| {
288+
build_similar_implementation_chunk(semantic_match, preferred_file_ranks)
289+
})
290+
.collect::<Vec<_>>();
291+
270292
for semantic_match in matches {
271293
if !seen.insert(semantic_match.chunk.key.clone()) {
272294
continue;
273295
}
274-
let ranking_note = preferred_file_ranks
275-
.get(&semantic_match.chunk.file_path)
276-
.map(|rank| format!(", graph-ranked file #{}", rank + 1))
277-
.unwrap_or_default();
278-
let content = format!(
279-
"Semantic match (similarity {:.2}{})\nSymbol: {}\nSummary: {}\nCode:\n{}",
280-
semantic_match.similarity,
281-
ranking_note,
282-
semantic_match.chunk.symbol_name,
283-
semantic_match.chunk.summary,
284-
semantic_match.chunk.code_excerpt,
285-
);
286-
chunks.push(
287-
LLMContextChunk::reference(semantic_match.chunk.file_path.clone(), content)
288-
.with_line_range(semantic_match.chunk.line_range)
289-
.with_provenance(ContextProvenance::semantic_retrieval(
290-
semantic_match.similarity,
291-
semantic_match.chunk.symbol_name.clone(),
292-
)),
293-
);
296+
chunks.push(build_semantic_match_chunk(
297+
&semantic_match,
298+
preferred_file_ranks,
299+
));
294300
if chunks.len() >= limit {
295301
break;
296302
}
@@ -299,6 +305,87 @@ pub async fn semantic_context_for_diff(
299305
chunks
300306
}
301307

308+
fn select_similar_implementation_matches(
309+
matches: &[SemanticMatch],
310+
limit: usize,
311+
) -> Vec<&SemanticMatch> {
312+
if limit == 0 {
313+
return Vec::new();
314+
}
315+
316+
let mut seen_files = HashSet::new();
317+
let mut selected = Vec::new();
318+
319+
for semantic_match in matches {
320+
if !seen_files.insert(semantic_match.chunk.file_path.clone()) {
321+
continue;
322+
}
323+
324+
selected.push(semantic_match);
325+
if selected.len() >= limit {
326+
break;
327+
}
328+
}
329+
330+
selected
331+
}
332+
333+
fn build_semantic_match_chunk(
334+
semantic_match: &SemanticMatch,
335+
preferred_file_ranks: &HashMap<PathBuf, usize>,
336+
) -> LLMContextChunk {
337+
let ranking_note =
338+
graph_ranked_file_note(preferred_file_ranks, &semantic_match.chunk.file_path);
339+
let content = format!(
340+
"Semantic match (similarity {:.2}{})\nSymbol: {}\nSummary: {}\nCode:\n{}",
341+
semantic_match.similarity,
342+
ranking_note,
343+
semantic_match.chunk.symbol_name,
344+
semantic_match.chunk.summary,
345+
semantic_match.chunk.code_excerpt,
346+
);
347+
348+
LLMContextChunk::reference(semantic_match.chunk.file_path.clone(), content)
349+
.with_line_range(semantic_match.chunk.line_range)
350+
.with_provenance(ContextProvenance::semantic_retrieval(
351+
semantic_match.similarity,
352+
semantic_match.chunk.symbol_name.clone(),
353+
))
354+
}
355+
356+
fn build_similar_implementation_chunk(
357+
semantic_match: &SemanticMatch,
358+
preferred_file_ranks: &HashMap<PathBuf, usize>,
359+
) -> LLMContextChunk {
360+
let ranking_note =
361+
graph_ranked_file_note(preferred_file_ranks, &semantic_match.chunk.file_path);
362+
let content = format!(
363+
"Similar implementation (similarity {:.2}{})\nCompare this implementation for repeated patterns or divergences.\nSymbol: {}\nSummary: {}\nCode:\n{}",
364+
semantic_match.similarity,
365+
ranking_note,
366+
semantic_match.chunk.symbol_name,
367+
semantic_match.chunk.summary,
368+
semantic_match.chunk.code_excerpt,
369+
);
370+
371+
LLMContextChunk::reference(semantic_match.chunk.file_path.clone(), content)
372+
.with_line_range(semantic_match.chunk.line_range)
373+
.with_provenance(ContextProvenance::similar_implementation(
374+
semantic_match.similarity,
375+
semantic_match.chunk.symbol_name.clone(),
376+
))
377+
}
378+
379+
fn graph_ranked_file_note(
380+
preferred_file_ranks: &HashMap<PathBuf, usize>,
381+
file_path: &Path,
382+
) -> String {
383+
preferred_file_ranks
384+
.get(file_path)
385+
.map(|rank| format!(", graph-ranked file #{}", rank + 1))
386+
.unwrap_or_default()
387+
}
388+
302389
#[allow(dead_code)]
303390
pub fn find_related_chunks(
304391
index: &SemanticIndex,
@@ -662,7 +749,11 @@ mod tests {
662749

663750
let chunks = semantic_context_for_diff(&index, &diff, None, None, 3, 0.1, &[]).await;
664751
assert_eq!(chunks.len(), 1);
665-
assert!(chunks[0].content.contains("Semantic match"));
752+
assert!(chunks[0].content.contains("Similar implementation"));
753+
assert!(matches!(
754+
chunks[0].provenance,
755+
Some(ContextProvenance::SimilarImplementation { .. })
756+
));
666757
}
667758

668759
#[test]
@@ -732,4 +823,72 @@ mod tests {
732823
assert_eq!(matches[0].chunk.file_path, PathBuf::from("src/graph.rs"));
733824
assert_eq!(matches[1].chunk.file_path, PathBuf::from("src/other.rs"));
734825
}
826+
827+
#[test]
828+
fn build_semantic_context_chunks_highlights_similar_implementations_first() {
829+
let matches = vec![
830+
SemanticMatch {
831+
chunk: SemanticChunk {
832+
key: "src/auth_guard.rs:validate:1:5".to_string(),
833+
file_path: PathBuf::from("src/auth_guard.rs"),
834+
symbol_name: "validate_admin".to_string(),
835+
line_range: (1, 5),
836+
summary: "Auth guard before a query".to_string(),
837+
embedding_text: "auth guard before a query".to_string(),
838+
code_excerpt: "fn validate_admin() {}".to_string(),
839+
embedding: local_hash_embedding("auth guard before a query"),
840+
content_hash: "guard".to_string(),
841+
},
842+
similarity: 0.93,
843+
},
844+
SemanticMatch {
845+
chunk: SemanticChunk {
846+
key: "src/member_guard.rs:validate:1:5".to_string(),
847+
file_path: PathBuf::from("src/member_guard.rs"),
848+
symbol_name: "validate_member".to_string(),
849+
line_range: (1, 5),
850+
summary: "Member auth guard before a query".to_string(),
851+
embedding_text: "member auth guard before a query".to_string(),
852+
code_excerpt: "fn validate_member() {}".to_string(),
853+
embedding: local_hash_embedding("member auth guard before a query"),
854+
content_hash: "member".to_string(),
855+
},
856+
similarity: 0.89,
857+
},
858+
SemanticMatch {
859+
chunk: SemanticChunk {
860+
key: "src/sanitize.rs:sanitize:1:5".to_string(),
861+
file_path: PathBuf::from("src/sanitize.rs"),
862+
symbol_name: "sanitize_name".to_string(),
863+
line_range: (1, 5),
864+
summary: "Sanitize a username before building a query".to_string(),
865+
embedding_text: "sanitize a username before building a query".to_string(),
866+
code_excerpt: "fn sanitize_name() {}".to_string(),
867+
embedding: local_hash_embedding("sanitize a username before building a query"),
868+
content_hash: "sanitize".to_string(),
869+
},
870+
similarity: 0.82,
871+
},
872+
];
873+
874+
let chunks = build_semantic_context_chunks(
875+
matches,
876+
3,
877+
&build_preferred_file_ranks(&[PathBuf::from("src/auth_guard.rs")]),
878+
);
879+
880+
assert_eq!(chunks.len(), 3);
881+
assert!(chunks[0].content.contains("Similar implementation"));
882+
assert!(chunks[1].content.contains("Similar implementation"));
883+
assert!(chunks[2].content.contains("Semantic match"));
884+
assert!(matches!(
885+
chunks[0].provenance,
886+
Some(ContextProvenance::SimilarImplementation { .. })
887+
));
888+
assert!(matches!(
889+
chunks[2].provenance,
890+
Some(ContextProvenance::SemanticRetrieval { .. })
891+
));
892+
assert!(chunks[0].content.contains("graph-ranked file #1"));
893+
}
735894
}

0 commit comments

Comments
 (0)