Skip to content

Commit e3b2b84

Browse files
authored
feat(parsing): single-quote JSON repair + raw bracket span (#28) (#45)
- repair_json_candidates: convert single-quoted keys/values to double-quoted via convert_single_quoted_json_to_double (issue #28) - find_balanced_bracket_span: extract raw [..] or {..} when valid JSON not found, so repair can fix and parse - ROADMAP: document #28 repairs; add #25 (dynamic context) to Shipped - Test: parse_json_with_single_quotes Made-with: Cursor
1 parent 5af8a45 commit e3b2b84

File tree

2 files changed

+108
-1
lines changed

2 files changed

+108
-1
lines changed

docs/ROADMAP.md

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -74,7 +74,8 @@ Create labels once: `priority: high`, `priority: medium`, `priority: low`, `area
7474

7575
- **Natural language rules (#12):** `review_rules_prose: [ "Rule one", "Rule two" ]` in config; injected as "Custom rules (natural language)" bullets into review guidance. Tests: `test_config_deserialize_review_rules_prose_from_yaml`, `build_review_guidance_includes_prose_rules`.
7676
- **Triage skip deletion-only (#29):** `triage_skip_deletion_only: true` in config; when true, deletion-only diffs get `SkipDeletionOnly` and skip expensive review. Default false. Tests: `test_triage_deletion_only_with_skip_true_returns_skip_deletion_only`, config deserialize.
77-
- **LLM parsing (#28):** Repair candidate for diff-style line prefixes (`+` on each line) in `repair_json_candidates`; test `parse_json_with_diff_prefix_artifact`.
77+
- **Dynamic context (#25):** `find_enclosing_boundary_line` in `function_chunker.rs`; `context.rs` expands hunk start to enclosing function/class boundary; asymmetric context (5 before, 1 after).
78+
- **LLM parsing (#28):** Repair candidates in `repair_json_candidates`: diff-style line prefixes (`+`), single-quoted keys/values → double-quoted via `convert_single_quoted_json_to_double`; raw bracket span fallback when valid JSON not found. Tests: `parse_json_with_diff_prefix_artifact`, `parse_json_with_single_quotes`.
7879
- **Secrets (#20):** Built-in secret scanner in `plugins/builtin/secret_scanner.rs`.
7980
- **Verification (#23):** Verification pass and config (verification.*) in pipeline.
8081

src/parsing/llm_response.rs

Lines changed: 106 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -257,6 +257,10 @@ fn parse_json_format(content: &str, file_path: &Path) -> Vec<core::comment::RawC
257257
.or_else(|| find_json_array(content))
258258
.or_else(|| find_json_object(content));
259259

260+
let json_str = json_str
261+
.or_else(|| find_balanced_bracket_span(content, '[', ']'))
262+
.or_else(|| find_balanced_bracket_span(content, '{', '}'));
263+
260264
if let Some(json_str) = json_str {
261265
for candidate in repair_json_candidates(&json_str) {
262266
let Ok(value) = serde_json::from_str::<serde_json::Value>(&candidate) else {
@@ -305,6 +309,26 @@ fn find_json_object(content: &str) -> Option<String> {
305309
find_balanced_json(content, '{', '}')
306310
}
307311

312+
/// Find the first balanced span for open/close (e.g. [ and ]) without validating JSON.
313+
/// Used when valid JSON isn't found so we can run repair (e.g. single-quote conversion) and retry.
314+
fn find_balanced_bracket_span(content: &str, open: char, close: char) -> Option<String> {
315+
for (start, _) in content.char_indices().filter(|&(_, ch)| ch == open) {
316+
let mut depth = 0i32;
317+
for (offset, ch) in content[start..].char_indices() {
318+
if ch == open {
319+
depth += 1;
320+
} else if ch == close {
321+
depth -= 1;
322+
if depth == 0 {
323+
let end = start + offset;
324+
return Some(content[start..=end].to_string());
325+
}
326+
}
327+
}
328+
}
329+
None
330+
}
331+
308332
fn find_balanced_json(content: &str, open: char, close: char) -> Option<String> {
309333
for (start, _) in content.char_indices().filter(|&(_, ch)| ch == open) {
310334
let mut depth = 0i32;
@@ -368,9 +392,80 @@ fn repair_json_candidates(candidate: &str) -> Vec<String> {
368392
candidates.push(without_diff_prefix.to_string());
369393
}
370394

395+
// When LLM outputs single-quoted keys/values (e.g. {'line': 9}), convert to valid JSON (issue #28).
396+
let with_double_quotes = convert_single_quoted_json_to_double(trimmed);
397+
if with_double_quotes != trimmed
398+
&& (with_double_quotes.starts_with('[') || with_double_quotes.starts_with('{'))
399+
{
400+
candidates.push(with_double_quotes);
401+
}
402+
371403
candidates
372404
}
373405

406+
/// Convert single-quoted JSON-like strings to double-quoted so serde_json can parse.
407+
/// Only converts single-quoted regions that are outside any double-quoted string.
408+
fn convert_single_quoted_json_to_double(s: &str) -> String {
409+
let mut out = String::with_capacity(s.len());
410+
let mut chars = s.chars().peekable();
411+
let mut in_double = false;
412+
let mut escape_next = false;
413+
414+
while let Some(c) = chars.next() {
415+
if escape_next {
416+
escape_next = false;
417+
out.push(c);
418+
continue;
419+
}
420+
if in_double {
421+
if c == '\\' {
422+
escape_next = true;
423+
out.push(c);
424+
} else if c == '"' {
425+
in_double = false;
426+
out.push(c);
427+
} else {
428+
out.push(c);
429+
}
430+
continue;
431+
}
432+
if c == '"' {
433+
in_double = true;
434+
out.push(c);
435+
continue;
436+
}
437+
if c == '\'' {
438+
// Start of single-quoted string: emit " and copy until unescaped ', escaping " and \.
439+
out.push('"');
440+
for c in chars.by_ref() {
441+
if c == '\\' {
442+
escape_next = true;
443+
out.push(c);
444+
} else if c == '\'' {
445+
if escape_next {
446+
escape_next = false;
447+
out.push('\'');
448+
} else {
449+
out.push('"');
450+
break;
451+
}
452+
} else if c == '"' {
453+
out.push('\\');
454+
out.push('"');
455+
} else {
456+
out.push(c);
457+
}
458+
}
459+
if escape_next {
460+
escape_next = false;
461+
}
462+
continue;
463+
}
464+
out.push(c);
465+
}
466+
out
467+
}
468+
374469
fn extract_structured_items(value: serde_json::Value) -> Vec<serde_json::Value> {
375470
if let Some(items) = value.as_array() {
376471
return items.clone();
@@ -1243,6 +1338,17 @@ let data = &input;
12431338
assert!(comments[0].content.contains("Missing check"));
12441339
}
12451340

1341+
#[test]
1342+
fn parse_json_with_single_quotes() {
1343+
// LLM sometimes outputs JSON with single-quoted keys/values; repair converts to double quotes (issue #28).
1344+
let input = r#"[{'line': 9, 'issue': 'Use of deprecated API'}]"#;
1345+
let file_path = PathBuf::from("src/lib.rs");
1346+
let comments = parse_llm_response(input, &file_path).unwrap();
1347+
assert_eq!(comments.len(), 1);
1348+
assert_eq!(comments[0].line_number, 9);
1349+
assert!(comments[0].content.contains("deprecated"));
1350+
}
1351+
12461352
// ── Bug: find_json_array uses mismatched brackets ──────────────────
12471353
//
12481354
// `find_json_array` uses `find('[')` (first) + `rfind(']')` (last).

0 commit comments

Comments
 (0)