Skip to content

Commit 923c3aa

Browse files
committed
feat: ground reviews with semantic retrieval and structured analyzer signals
1 parent 42f5ae9 commit 923c3aa

23 files changed

Lines changed: 2278 additions & 209 deletions

src/adapters/llm.rs

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -162,6 +162,18 @@ pub trait LLMAdapter: Send + Sync {
162162
async fn complete(&self, request: LLMRequest) -> Result<LLMResponse>;
163163
fn model_name(&self) -> &str;
164164

165+
/// Embed one or more texts for semantic retrieval and feedback learning.
166+
async fn embed(&self, _texts: &[String]) -> Result<Vec<Vec<f32>>> {
167+
Err(anyhow::anyhow!(
168+
"Embeddings are not supported by adapter for model {}",
169+
self.model_name()
170+
))
171+
}
172+
173+
fn supports_embeddings(&self) -> bool {
174+
false
175+
}
176+
165177
/// Multi-turn chat with tool use support.
166178
/// Default impl flattens to a single `complete()` call (no tool support).
167179
async fn chat(&self, request: ChatRequest) -> Result<ChatResponse> {

src/adapters/openai.rs

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,12 @@ struct OpenAIResponsesRequest {
3333
max_output_tokens: usize,
3434
}
3535

36+
#[derive(Serialize)]
37+
struct OpenAIEmbeddingRequest {
38+
model: String,
39+
input: Vec<String>,
40+
}
41+
3642
#[derive(Serialize, Deserialize)]
3743
struct Message {
3844
role: String,
@@ -88,6 +94,17 @@ struct OpenAIResponsesUsage {
8894
total_tokens: usize,
8995
}
9096

97+
#[derive(Deserialize)]
98+
struct OpenAIEmbeddingResponse {
99+
data: Vec<OpenAIEmbeddingData>,
100+
}
101+
102+
#[derive(Deserialize)]
103+
struct OpenAIEmbeddingData {
104+
embedding: Vec<f32>,
105+
index: usize,
106+
}
107+
91108
// === Chat API types (for tool use / function calling) ===
92109

93110
#[derive(Serialize)]
@@ -216,6 +233,41 @@ impl LLMAdapter for OpenAIAdapter {
216233
&self.config.model_name
217234
}
218235

236+
async fn embed(&self, texts: &[String]) -> Result<Vec<Vec<f32>>> {
237+
if texts.is_empty() {
238+
return Ok(Vec::new());
239+
}
240+
241+
let request = OpenAIEmbeddingRequest {
242+
model: self.config.model_name.clone(),
243+
input: texts.to_vec(),
244+
};
245+
246+
let url = format!("{}/embeddings", self.base_url);
247+
let response = common::send_with_retry_config("OpenAI", &self.retry_config, &mut || {
248+
self.client
249+
.post(&url)
250+
.header("Authorization", format!("Bearer {}", self.api_key))
251+
.header("Content-Type", "application/json")
252+
.json(&request)
253+
})
254+
.await
255+
.context("Failed to send embedding request to OpenAI")?;
256+
257+
let embedding_response: OpenAIEmbeddingResponse = response
258+
.json()
259+
.await
260+
.context("Failed to parse OpenAI embedding response")?;
261+
262+
let mut data = embedding_response.data;
263+
data.sort_by_key(|item| item.index);
264+
Ok(data.into_iter().map(|item| item.embedding).collect())
265+
}
266+
267+
fn supports_embeddings(&self) -> bool {
268+
true
269+
}
270+
219271
async fn chat(&self, request: ChatRequest) -> Result<ChatResponse> {
220272
let mut messages: Vec<OpenAIChatMessage> = Vec::new();
221273

src/commands/misc.rs

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -166,7 +166,7 @@ pub async fn lsp_check_command(path: PathBuf, config: config::Config) -> Result<
166166
}
167167

168168
pub async fn feedback_command(
169-
config: config::Config,
169+
mut config: config::Config,
170170
accept: Option<PathBuf>,
171171
reject: Option<PathBuf>,
172172
feedback_path: Option<PathBuf>,
@@ -180,6 +180,7 @@ pub async fn feedback_command(
180180
};
181181

182182
let feedback_path = feedback_path.unwrap_or_else(|| config.feedback_path.clone());
183+
config.feedback_path = feedback_path.clone();
183184
let content = tokio::fs::read_to_string(&input_path).await?;
184185
let mut comments: Vec<core::Comment> = serde_json::from_str(&content)?;
185186

@@ -209,6 +210,11 @@ pub async fn feedback_command(
209210
action
210211
);
211212

213+
let is_accepted = action == "accept";
214+
for comment in &comments {
215+
let _ = review::record_semantic_feedback_example(&config, comment, is_accepted).await;
216+
}
217+
212218
// Also record in the convention store for learned suppression/boost patterns
213219
let convention_path = resolve_convention_store_path_for_feedback(&config);
214220
if let Some(ref cpath) = convention_path {
@@ -218,7 +224,6 @@ pub async fn feedback_command(
218224
.and_then(|j| ConventionStore::from_json(j).ok())
219225
.unwrap_or_default();
220226
let now = chrono::Utc::now().to_rfc3339();
221-
let is_accepted = action == "accept";
222227
for comment in &comments {
223228
let file_patterns = review::derive_file_patterns(&comment.file_path);
224229
cstore.record_feedback(

src/commands/review.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -92,10 +92,10 @@ pub async fn review_command(
9292
.await?;
9393

9494
// Run pre-analyzers to get additional context
95-
let analyzer_chunks = plugin_manager
95+
let pre_analysis = plugin_manager
9696
.run_pre_analyzers(diff, &repo_path_str)
9797
.await?;
98-
context_chunks.extend(analyzer_chunks);
98+
context_chunks.extend(pre_analysis.context_chunks);
9999

100100
// Extract symbols from diff and fetch their definitions
101101
let symbols = crate::review::extract_symbols_from_diff(diff);

src/commands/smart_review.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -153,10 +153,10 @@ pub async fn smart_review_command(
153153
.await?;
154154

155155
// Run pre-analyzers to get additional context
156-
let analyzer_chunks = plugin_manager
156+
let pre_analysis = plugin_manager
157157
.run_pre_analyzers(diff, &repo_path_str)
158158
.await?;
159-
context_chunks.extend(analyzer_chunks);
159+
context_chunks.extend(pre_analysis.context_chunks);
160160

161161
// Get path-specific configuration
162162
let path_config = config.get_path_config(&diff.file_path);

src/config.rs

Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -314,6 +314,32 @@ pub struct Config {
314314
/// Minimum number of feedback observations before adjusting confidence (default 5).
315315
#[serde(default = "default_feedback_min_observations")]
316316
pub feedback_min_observations: usize,
317+
318+
/// Enable semantic repository retrieval for related code context.
319+
#[serde(default = "default_false")]
320+
pub semantic_rag: bool,
321+
322+
#[serde(default = "default_semantic_rag_max_files")]
323+
pub semantic_rag_max_files: usize,
324+
325+
#[serde(default = "default_semantic_rag_top_k")]
326+
pub semantic_rag_top_k: usize,
327+
328+
#[serde(default = "default_semantic_rag_min_similarity")]
329+
pub semantic_rag_min_similarity: f32,
330+
331+
/// Enable embedding-backed feedback memory on top of aggregate stats.
332+
#[serde(default)]
333+
pub semantic_feedback: bool,
334+
335+
#[serde(default = "default_semantic_feedback_similarity")]
336+
pub semantic_feedback_similarity: f32,
337+
338+
#[serde(default = "default_semantic_feedback_min_examples")]
339+
pub semantic_feedback_min_examples: usize,
340+
341+
#[serde(default = "default_semantic_feedback_max_neighbors")]
342+
pub semantic_feedback_max_neighbors: usize,
317343
}
318344

319345
#[derive(Debug, Clone, Serialize, Deserialize, Default)]
@@ -482,6 +508,14 @@ impl Default for Config {
482508
verification_max_comments: default_verification_max_comments(),
483509
enhanced_feedback: false,
484510
feedback_min_observations: default_feedback_min_observations(),
511+
semantic_rag: false,
512+
semantic_rag_max_files: default_semantic_rag_max_files(),
513+
semantic_rag_top_k: default_semantic_rag_top_k(),
514+
semantic_rag_min_similarity: default_semantic_rag_min_similarity(),
515+
semantic_feedback: false,
516+
semantic_feedback_similarity: default_semantic_feedback_similarity(),
517+
semantic_feedback_min_examples: default_semantic_feedback_min_examples(),
518+
semantic_feedback_max_neighbors: default_semantic_feedback_max_neighbors(),
485519
}
486520
}
487521
}
@@ -914,6 +948,28 @@ impl Config {
914948
if self.feedback_suppression_threshold == 0 {
915949
self.feedback_suppression_threshold = default_feedback_suppression_threshold();
916950
}
951+
if self.semantic_rag_max_files == 0 {
952+
self.semantic_rag_max_files = default_semantic_rag_max_files();
953+
}
954+
if self.semantic_rag_top_k == 0 {
955+
self.semantic_rag_top_k = default_semantic_rag_top_k();
956+
}
957+
if !self.semantic_rag_min_similarity.is_finite() {
958+
self.semantic_rag_min_similarity = default_semantic_rag_min_similarity();
959+
} else {
960+
self.semantic_rag_min_similarity = self.semantic_rag_min_similarity.clamp(0.0, 1.0);
961+
}
962+
if !self.semantic_feedback_similarity.is_finite() {
963+
self.semantic_feedback_similarity = default_semantic_feedback_similarity();
964+
} else {
965+
self.semantic_feedback_similarity = self.semantic_feedback_similarity.clamp(0.0, 1.0);
966+
}
967+
if self.semantic_feedback_min_examples == 0 {
968+
self.semantic_feedback_min_examples = default_semantic_feedback_min_examples();
969+
}
970+
if self.semantic_feedback_max_neighbors == 0 {
971+
self.semantic_feedback_max_neighbors = default_semantic_feedback_max_neighbors();
972+
}
917973
}
918974

919975
pub fn get_path_config(&self, file_path: &Path) -> Option<&PathConfig> {
@@ -1300,6 +1356,30 @@ fn default_feedback_min_observations() -> usize {
13001356
5
13011357
}
13021358

1359+
fn default_semantic_rag_max_files() -> usize {
1360+
500
1361+
}
1362+
1363+
fn default_semantic_rag_top_k() -> usize {
1364+
5
1365+
}
1366+
1367+
fn default_semantic_rag_min_similarity() -> f32 {
1368+
0.25
1369+
}
1370+
1371+
fn default_semantic_feedback_similarity() -> f32 {
1372+
0.82
1373+
}
1374+
1375+
fn default_semantic_feedback_min_examples() -> usize {
1376+
3
1377+
}
1378+
1379+
fn default_semantic_feedback_max_neighbors() -> usize {
1380+
8
1381+
}
1382+
13031383
fn normalize_comment_types(values: &[String]) -> Vec<String> {
13041384
if values.is_empty() {
13051385
return default_comment_types();

src/core/mod.rs

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ pub mod pr_history;
2020
pub mod pr_summary;
2121
pub mod prompt;
2222
pub mod rules;
23+
pub mod semantic;
2324
pub mod smart_review_prompt;
2425
pub mod symbol_graph;
2526
pub mod symbol_index;
@@ -36,5 +37,11 @@ pub use git::{validate_ref_name, GitIntegration};
3637
pub use pr_summary::{PRSummaryGenerator, SummaryOptions};
3738
pub use prompt::{PromptBuilder, SpecializedPassKind};
3839
pub use rules::{active_rules_for_file, load_rules_from_patterns, ReviewRule};
40+
pub use semantic::{
41+
build_feedback_embedding_text, default_index_path, default_semantic_feedback_path,
42+
embed_texts_with_fallback, find_similar_feedback_examples, load_semantic_feedback_store,
43+
refresh_semantic_index, save_semantic_feedback_store, semantic_context_for_diff,
44+
SemanticFeedbackExample, SemanticFeedbackStore,
45+
};
3946
pub use smart_review_prompt::SmartReviewPromptBuilder;
4047
pub use symbol_index::SymbolIndex;

src/core/prompt.rs

Lines changed: 23 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -69,15 +69,19 @@ fn shared_review_principles() -> &'static str {
6969
fn shared_output_contract(category_label: &str, no_issues_message: &str) -> String {
7070
format!(
7171
r#"Response contract:
72-
- Format every finding as:
73-
Line [number]{{ [rule:<id>] optional}}: [{category_label}] - [specific problem]. [Impact]. [Smallest safe fix].
74-
- For concrete local fixes, add this block immediately after the finding:
75-
<<<ORIGINAL
76-
<code copied from the diff>
77-
===
78-
<improved code>
79-
>>>SUGGESTED
80-
- If no relevant issues are found, respond with: {no_issues_message}"#
72+
- Preferred format: return a JSON array only. Each finding object must use this schema:
73+
{{"line": 42, "category": "{category_label}", "issue": "specific problem", "impact": "why it matters", "fix": "smallest safe fix", "rule_id": "optional.rule.id", "severity": "warning", "confidence": 0.91, "fix_effort": "low", "tags": ["optional-tag"], "original_code": "optional", "suggested_code": "optional"}}
74+
- Only include `original_code` and `suggested_code` when you can quote a precise local edit from the diff.
75+
- If no relevant issues are found, return `[]`.
76+
- Fallback only if strict JSON is impossible:
77+
Line [number]{{ [rule:<id>] optional}}: [{category_label}] - [specific problem]. [Impact]. [Smallest safe fix].
78+
- For concrete local fixes in fallback mode, add this block immediately after the finding:
79+
<<<ORIGINAL
80+
<code copied from the diff>
81+
===
82+
<improved code>
83+
>>>SUGGESTED
84+
- If fallback mode finds no relevant issues, respond with: {no_issues_message}"#
8185
)
8286
}
8387

@@ -471,6 +475,16 @@ mod tests {
471475
.contains("bugs, security vulnerabilities, or performance issues"));
472476
}
473477

478+
#[test]
479+
fn default_prompt_prefers_json_output_contract() {
480+
let config = PromptConfig::default();
481+
assert!(config
482+
.system_prompt
483+
.contains("Preferred format: return a JSON array only"));
484+
assert!(config.system_prompt.contains("\"line\": 42"));
485+
assert!(config.system_prompt.contains("return `[]`"));
486+
}
487+
474488
#[test]
475489
fn security_prompt_focuses_on_security() {
476490
let prompt = build_security_prompt();

0 commit comments

Comments
 (0)