Skip to content

Commit 0161b0a

Browse files
committed
Strengthen rule-id extraction and surface rules in outputs
1 parent edca997 commit 0161b0a

File tree

3 files changed

+68
-7
lines changed

3 files changed

+68
-7
lines changed

README.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -120,6 +120,7 @@ diffscope pr --post-comments
120120
```
121121

122122
`--post-comments` now attempts inline file/line comments first, falls back to PR-level comments when GitHub rejects an anchor, and upserts a sticky DiffScope summary comment on the PR.
123+
When rule matching is active, DiffScope also includes detected `rule_id` values in PR comments and summaries.
123124

124125
### Evaluation Fixtures
125126
```yaml

src/core/prompt.rs

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -40,13 +40,14 @@ Review the code changes below and identify specific issues. Focus on problems th
4040
- Line number where the issue occurs
4141
- Clear description of the problem
4242
- Impact if not addressed
43+
- Optional rule id when a scoped review rule applies
4344
- Suggested fix (if applicable)
4445
4546
Format each issue as:
46-
Line [number]: [Issue type] - [Description]. [Impact]. [Suggestion if applicable].
47+
Line [number] [rule:<id> optional]: [Issue type] - [Description]. [Impact]. [Suggestion if applicable].
4748
4849
Examples:
49-
Line 42: Security - User input passed directly to SQL query. Risk of SQL injection. Use parameterized queries.
50+
Line 42 [rule:sec.sql.injection]: Security - User input passed directly to SQL query. Risk of SQL injection. Use parameterized queries.
5051
Line 13: Bug - Missing null check before dereferencing pointer. May cause crash. Add null validation.
5152
Line 28: Performance - O(n²) algorithm for large dataset. Will be slow with many items. Consider using a hash map.
5253
</instructions>"#.to_string(),

src/main.rs

Lines changed: 64 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -936,6 +936,9 @@ fn build_github_comment_body(comment: &core::Comment) -> String {
936936
"**{:?} ({:?})**\n\n{}",
937937
comment.severity, comment.category, comment.content
938938
);
939+
if let Some(rule_id) = &comment.rule_id {
940+
body.push_str(&format!("\n\n**Rule:** `{}`", rule_id));
941+
}
939942
if let Some(suggestion) = &comment.suggestion {
940943
body.push_str("\n\n**Suggested fix:** ");
941944
body.push_str(suggestion);
@@ -1123,11 +1126,17 @@ fn build_pr_summary_comment_body(comments: &[core::Comment]) -> String {
11231126
if shown >= 5 {
11241127
break;
11251128
}
1129+
let rule_suffix = comment
1130+
.rule_id
1131+
.as_deref()
1132+
.map(|rule_id| format!(" rule:{}", rule_id))
1133+
.unwrap_or_default();
11261134
body.push_str(&format!(
1127-
"- `{}:{}` [{:?}] {}\n",
1135+
"- `{}:{}` [{:?}{}] {}\n",
11281136
comment.file_path.display(),
11291137
comment.line_number,
11301138
comment.severity,
1139+
rule_suffix,
11311140
comment.content
11321141
));
11331142
}
@@ -2230,8 +2239,9 @@ async fn review_diff_content_raw(
22302239

22312240
fn parse_llm_response(content: &str, file_path: &Path) -> Result<Vec<core::comment::RawComment>> {
22322241
let mut comments = Vec::new();
2233-
static LINE_PATTERN: Lazy<Regex> =
2234-
Lazy::new(|| Regex::new(r"(?i)line\s+(\d+):\s*(.+)").unwrap());
2242+
static LINE_PATTERN: Lazy<Regex> = Lazy::new(|| {
2243+
Regex::new(r"(?i)line\s+(\d+)((?:\s*(?:\[[^\]]+\]|\([^)]+\)))*)\s*:\s*(.+)").unwrap()
2244+
});
22352245

22362246
for line in content.lines() {
22372247
let trimmed = line.trim();
@@ -2250,8 +2260,11 @@ fn parse_llm_response(content: &str, file_path: &Path) -> Result<Vec<core::comme
22502260

22512261
if let Some(caps) = LINE_PATTERN.captures(line) {
22522262
let line_number: usize = caps.get(1).unwrap().as_str().parse()?;
2253-
let comment_text = caps.get(2).unwrap().as_str().trim();
2254-
let (rule_id, comment_text) = extract_rule_id_from_text(comment_text);
2263+
let metadata = caps.get(2).map(|value| value.as_str()).unwrap_or("");
2264+
let comment_text = caps.get(3).unwrap().as_str().trim();
2265+
let (inline_rule_id, comment_text) = extract_rule_id_from_text(comment_text);
2266+
let metadata_rule_id = extract_rule_id_from_metadata(metadata);
2267+
let rule_id = inline_rule_id.or(metadata_rule_id);
22552268

22562269
// Extract suggestion if present
22572270
let (content, suggestion) = if let Some(sugg_idx) = comment_text.rfind(". Consider ") {
@@ -2324,6 +2337,9 @@ fn format_as_patch(comments: &[core::Comment]) -> String {
23242337
comment.severity,
23252338
comment.content
23262339
));
2340+
if let Some(rule_id) = &comment.rule_id {
2341+
output.push_str(&format!("# Rule: {}\n", rule_id));
2342+
}
23272343
if let Some(suggestion) = &comment.suggestion {
23282344
output.push_str(&format!("# Suggestion: {}\n", suggestion));
23292345
}
@@ -2454,6 +2470,9 @@ fn format_as_markdown(comments: &[core::Comment]) -> String {
24542470
"**Confidence:** {:.0}%\n",
24552471
comment.confidence * 100.0
24562472
));
2473+
if let Some(rule_id) = &comment.rule_id {
2474+
output.push_str(&format!("**Rule:** `{}`\n", rule_id));
2475+
}
24572476
output.push_str(&format!("**Fix Effort:** {}\n\n", effort_badge));
24582477

24592478
output.push_str(&format!("{}\n\n", comment.content));
@@ -2979,6 +2998,20 @@ fn extract_rule_id_from_text(text: &str) -> (Option<String>, String) {
29792998
(None, text.trim().to_string())
29802999
}
29813000

3001+
fn extract_rule_id_from_metadata(metadata: &str) -> Option<String> {
3002+
static META_RULE: Lazy<Regex> =
3003+
Lazy::new(|| Regex::new(r"(?i)rule\s*[:=]\s*([a-z0-9_.-]+)").unwrap());
3004+
3005+
META_RULE
3006+
.captures(metadata)
3007+
.and_then(|captures| {
3008+
captures
3009+
.get(1)
3010+
.map(|value| value.as_str().trim().to_string())
3011+
})
3012+
.filter(|value| !value.is_empty())
3013+
}
3014+
29823015
fn format_smart_review_output(
29833016
comments: &[core::Comment],
29843017
summary: &core::comment::ReviewSummary,
@@ -3169,6 +3202,9 @@ fn format_detailed_comment(comment: &core::Comment) -> String {
31693202
}
31703203
output.push_str("\n\n");
31713204
}
3205+
if let Some(rule_id) = &comment.rule_id {
3206+
output.push_str(&format!("**Rule:** `{}`\n\n", rule_id));
3207+
}
31723208

31733209
output.push_str(&format!("{}\n\n", comment.content));
31743210

@@ -4587,6 +4623,29 @@ TAGS: auth, security
45874623
assert_eq!(comment.fix_effort, Some(core::comment::FixEffort::High));
45884624
}
45894625

4626+
#[test]
4627+
fn parse_llm_response_extracts_rule_from_line_metadata() {
4628+
let input = "Line 12 [rule:sec.sql.injection]: Security - Raw SQL with user input.";
4629+
let file_path = PathBuf::from("src/lib.rs");
4630+
let comments = parse_llm_response(input, &file_path).unwrap();
4631+
assert_eq!(comments.len(), 1);
4632+
assert_eq!(comments[0].line_number, 12);
4633+
assert_eq!(comments[0].rule_id.as_deref(), Some("sec.sql.injection"));
4634+
}
4635+
4636+
#[test]
4637+
fn format_patch_includes_rule_id() {
4638+
let mut comment = build_comment(
4639+
"rule-patch",
4640+
core::comment::Category::Security,
4641+
core::comment::Severity::Warning,
4642+
0.9,
4643+
);
4644+
comment.rule_id = Some("sec.auth.guard".to_string());
4645+
let patch = format_as_patch(&[comment]);
4646+
assert!(patch.contains("# Rule: sec.auth.guard"));
4647+
}
4648+
45904649
#[test]
45914650
fn comment_type_filter_keeps_only_enabled_types() {
45924651
let comments = vec![

0 commit comments

Comments
 (0)