Skip to content

Commit 5bf1ca7

Browse files
haasonsaasclaude
andcommitted
fix: 8 bugs in security detection — UTF-8 panic, false positives, detection gaps
- Fix redact() panic on multi-byte UTF-8 by using char_indices() for safe boundaries - Fix RE_JWT matching literal backslash (not valid in JWT tokens) - Fix connection string regex missing postgresql:// scheme (SQLAlchemy standard) - Fix "des " false positive matching "nodes ", "codes ", etc. — use word boundaries - Fix CWE tag extraction only finding first occurrence — now extracts all - Fix generate_code_suggestion matching "use" inside "because"/"refuse"/etc. - Remove dead Option wrapper from process_raw_comment (always returned Some) - Fix "doc" miscategorizing "docker" as Documentation — use "documentation"/"docstring" - Tighten overly-broad security keywords: missing header, file upload, input validation Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
1 parent 118e083 commit 5bf1ca7

2 files changed

Lines changed: 74 additions & 31 deletions

File tree

src/core/comment.rs

Lines changed: 37 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -132,9 +132,7 @@ impl CommentSynthesizer {
132132
let mut comments = Vec::new();
133133

134134
for raw in raw_comments {
135-
if let Some(comment) = Self::process_raw_comment(raw)? {
136-
comments.push(comment);
137-
}
135+
comments.push(Self::process_raw_comment(raw)?);
138136
}
139137

140138
Self::deduplicate_comments(&mut comments);
@@ -177,7 +175,7 @@ impl CommentSynthesizer {
177175
}
178176
}
179177

180-
fn process_raw_comment(raw: RawComment) -> Result<Option<Comment>> {
178+
fn process_raw_comment(raw: RawComment) -> Result<Comment> {
181179
let lower = raw.content.to_lowercase();
182180
let severity = raw
183181
.severity
@@ -203,7 +201,7 @@ impl CommentSynthesizer {
203201
let code_suggestion = Self::generate_code_suggestion(&raw);
204202
let id = Self::generate_comment_id(&raw.file_path, &raw.content, &category);
205203

206-
Ok(Some(Comment {
204+
Ok(Comment {
207205
id,
208206
file_path: raw.file_path,
209207
line_number: raw.line_number,
@@ -217,7 +215,7 @@ impl CommentSynthesizer {
217215
tags,
218216
fix_effort,
219217
feedback: None,
220-
}))
218+
})
221219
}
222220

223221
fn generate_comment_id(file_path: &Path, content: &str, category: &Category) -> String {
@@ -306,7 +304,7 @@ impl CommentSynthesizer {
306304
|| lower.contains("verbose error")
307305
|| lower.contains("information disclosure")
308306
|| lower.contains("security header")
309-
|| lower.contains("missing header")
307+
|| lower.contains("missing security header")
310308
// Unsafe code patterns
311309
|| lower.contains("unsafe block")
312310
|| lower.contains("unsafe {")
@@ -331,9 +329,10 @@ impl CommentSynthesizer {
331329
|| lower.contains("no pagination")
332330
|| lower.contains("unbounded query")
333331
|| lower.contains("graphql depth")
334-
|| lower.contains("file upload")
335332
|| lower.contains("insecure upload")
336-
|| lower.contains("input validation")
333+
|| lower.contains("unrestricted upload")
334+
|| (lower.contains("file upload") && (lower.contains("insecure") || lower.contains("unrestricted") || lower.contains("vulnerability") || lower.contains("security")))
335+
|| (lower.contains("input validation") && (lower.contains("missing") || lower.contains("vulnerability") || lower.contains("security") || lower.contains("injection")))
337336
{
338337
Category::Security
339338
} else if lower.contains("performance")
@@ -345,7 +344,10 @@ impl CommentSynthesizer {
345344
Category::Bug
346345
} else if lower.contains("style") || lower.contains("format") || lower.contains("naming") {
347346
Category::Style
348-
} else if lower.contains("doc") || lower.contains("comment") {
347+
} else if lower.contains("documentation")
348+
|| lower.contains("docstring")
349+
|| lower.contains("comment")
350+
{
349351
Category::Documentation
350352
} else if lower.contains("test") || lower.contains("coverage") {
351353
Category::Testing
@@ -460,7 +462,9 @@ impl CommentSynthesizer {
460462

461463
// ── Cryptography ──
462464
if lower.contains("weak cipher")
463-
|| lower.contains("des ")
465+
|| lower.contains(" des ")
466+
|| lower.starts_with("des ")
467+
|| lower.contains("3des")
464468
|| lower.contains("rc4")
465469
|| lower.contains("ecb mode")
466470
{
@@ -699,7 +703,9 @@ impl CommentSynthesizer {
699703

700704
// ── Cryptography tags ──
701705
if lower.contains("weak cipher")
702-
|| lower.contains("des ")
706+
|| lower.contains(" des ")
707+
|| lower.starts_with("des ")
708+
|| lower.contains("3des")
703709
|| lower.contains("rc4")
704710
|| lower.contains("blowfish")
705711
{
@@ -743,7 +749,7 @@ impl CommentSynthesizer {
743749
if lower.contains("debug mode") {
744750
tags.push("debug-mode".to_string());
745751
}
746-
if lower.contains("security header") || lower.contains("missing header") {
752+
if lower.contains("security header") || lower.contains("missing security header") {
747753
tags.push("security-headers".to_string());
748754
}
749755
if lower.contains("information disclosure") || lower.contains("data exposure") {
@@ -823,15 +829,20 @@ impl CommentSynthesizer {
823829
}
824830

825831
// ── CWE / OWASP tags ──
826-
// Extract CWE numbers from content
827-
if let Some(pos) = lower.find("cwe-") {
828-
let cwe_rest = &lower[pos..];
829-
let cwe_tag: String = cwe_rest
830-
.chars()
831-
.take_while(|c| c.is_alphanumeric() || *c == '-')
832-
.collect();
833-
if cwe_tag.len() > 4 {
834-
tags.push(cwe_tag);
832+
// Extract all CWE numbers from content
833+
{
834+
let mut search_from = 0;
835+
while let Some(offset) = lower[search_from..].find("cwe-") {
836+
let pos = search_from + offset;
837+
let cwe_rest = &lower[pos..];
838+
let cwe_tag: String = cwe_rest
839+
.chars()
840+
.take_while(|c| c.is_alphanumeric() || *c == '-')
841+
.collect();
842+
if cwe_tag.len() > 4 && !tags.contains(&cwe_tag) {
843+
tags.push(cwe_tag);
844+
}
845+
search_from = pos + 4; // skip past "cwe-" to find next
835846
}
836847
}
837848

@@ -898,7 +909,10 @@ impl CommentSynthesizer {
898909

899910
// Fallback: generate a basic suggestion from the textual suggestion field
900911
if let Some(suggestion) = &raw.suggestion {
901-
if suggestion.contains("use") || suggestion.contains("replace") {
912+
let has_action_word = suggestion
913+
.split_whitespace()
914+
.any(|w| w.eq_ignore_ascii_case("use") || w.eq_ignore_ascii_case("replace"));
915+
if has_action_word {
902916
return Some(CodeSuggestion {
903917
original_code: "// Original code would be extracted from context".to_string(),
904918
suggested_code: suggestion.clone(),

src/plugins/builtin/secret_scanner.rs

Lines changed: 37 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -57,8 +57,7 @@ static RE_PRIVATE_KEY: Lazy<Regex> = Lazy::new(|| {
5757
Regex::new(r"(?i)(-----BEGIN[ A-Z0-9_-]{0,100}PRIVATE KEY(?:\sBLOCK)?-----)").unwrap()
5858
});
5959
static RE_JWT: Lazy<Regex> = Lazy::new(|| {
60-
Regex::new(r"\b(ey[a-zA-Z0-9]{17,}\.ey[a-zA-Z0-9/\\_\-]{17,}\.[a-zA-Z0-9/\\_\-]{10,}=*)\b")
61-
.unwrap()
60+
Regex::new(r"\b(ey[a-zA-Z0-9]{17,}\.ey[a-zA-Z0-9/_-]{17,}\.[a-zA-Z0-9/_-]{10,}=*)\b").unwrap()
6261
});
6362
static RE_GCP_KEY: Lazy<Regex> = Lazy::new(|| Regex::new(r"\b(AIza[\w\-]{35})\b").unwrap());
6463
static RE_GCP_SA: Lazy<Regex> =
@@ -73,7 +72,7 @@ static RE_SENDGRID: Lazy<Regex> =
7372
static RE_TWILIO: Lazy<Regex> = Lazy::new(|| Regex::new(r"\b(SK[0-9a-fA-F]{32})\b").unwrap());
7473
static RE_NPM: Lazy<Regex> = Lazy::new(|| Regex::new(r"\b(npm_[a-z0-9]{36})\b").unwrap());
7574
static RE_CONN_STRING: Lazy<Regex> = Lazy::new(|| {
76-
Regex::new(r"(?i)((?:postgres|mysql|mongodb|redis|amqp|mssql)://[^:\s]+:[^@\s]+@[^\s]+)")
75+
Regex::new(r"(?i)((?:postgres(?:ql)?|mysql|mongodb|redis|amqp|mssql)://[^:\s]+:[^@\s]+@[^\s]+)")
7776
.unwrap()
7877
});
7978
static RE_GENERIC_CRED: Lazy<Regex> = Lazy::new(|| {
@@ -391,11 +390,21 @@ fn is_false_positive(value: &str) -> bool {
391390

392391
/// Redact a secret value, showing only the first few and last few chars.
393392
fn redact(value: &str) -> String {
394-
if value.len() <= 8 {
395-
return "*".repeat(value.len());
396-
}
397-
let show = 4.min(value.len() / 4);
398-
format!("{}...{}", &value[..show], &value[value.len() - show..])
393+
let char_count = value.chars().count();
394+
if char_count <= 8 {
395+
return "*".repeat(char_count);
396+
}
397+
let show = 4.min(char_count / 4);
398+
// Use char_indices to find safe byte boundaries for multi-byte UTF-8
399+
let prefix_end = value
400+
.char_indices()
401+
.nth(show)
402+
.map_or(value.len(), |(i, _)| i);
403+
let suffix_start = value
404+
.char_indices()
405+
.nth(char_count - show)
406+
.map_or(value.len(), |(i, _)| i);
407+
format!("{}...{}", &value[..prefix_end], &value[suffix_start..])
399408
}
400409

401410
pub struct SecretScanner;
@@ -569,6 +578,19 @@ mod tests {
569578
assert_eq!(findings[0].rule_id, "sec.secrets.connection-string");
570579
}
571580

581+
#[test]
582+
fn test_detects_postgresql_connection_string() {
583+
let findings = SecretScanner::scan_line(
584+
"DATABASE_URL=postgresql://admin:supersecret@db.example.com:5432/mydb",
585+
3,
586+
);
587+
assert!(
588+
!findings.is_empty(),
589+
"Should detect postgresql:// connection string"
590+
);
591+
assert_eq!(findings[0].rule_id, "sec.secrets.connection-string");
592+
}
593+
572594
#[test]
573595
fn test_ignores_placeholder() {
574596
let findings = SecretScanner::scan_line("password = \"your-secret-here\"", 1);
@@ -619,6 +641,13 @@ mod tests {
619641
assert!(redacted.contains("..."));
620642
}
621643

644+
#[test]
645+
fn test_redact_multibyte_utf8() {
646+
// Must not panic on multi-byte UTF-8 characters
647+
let redacted = redact("pässwörd_töken_sëcret_välue_here");
648+
assert!(redacted.contains("..."));
649+
}
650+
622651
#[tokio::test]
623652
async fn test_scanner_only_scans_added_lines() {
624653
let diff = make_diff_with_lines(

0 commit comments

Comments
 (0)