Skip to content

Commit 93563fc

Browse files
authored
Handle structured output review payloads (#33)
This fixes a failure mode in the review parser. Right now we only parse Claude's serialized `result` field. In recent runs, Claude sometimes returned the valid review payload in `structured_output` while `result` was empty, which made the action drop a good response and degrade into an empty review. This patch checks `structured_output` first, falls back to `result` for compatibility, and fails the run if neither contains a valid review payload. That follows the same direction as Anthropic's `claude-code-action`, which treats `structured_output` as the schema-backed output path: https://github.com/anthropics/claude-code-action/blob/main/base-action/src/run-claude-sdk.ts#L208-L228 Tests are updated to cover the `structured_output` path and the failure case.
1 parent d0af9a4 commit 93563fc

File tree

4 files changed

+208
-49
lines changed

4 files changed

+208
-49
lines changed

claudecode/github_action_audit.py

Lines changed: 29 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -577,7 +577,13 @@ def run_code_review(self, repo_dir: Path, prompt: str) -> Tuple[bool, str, Dict[
577577

578578
# If returncode is 0, extract review findings
579579
if result.returncode == 0:
580-
parsed_results = self._extract_review_findings(parsed_result)
580+
try:
581+
parsed_results = self._extract_review_findings(parsed_result)
582+
except ValueError as error:
583+
if attempt == NUM_RETRIES - 1:
584+
return False, str(error), {}
585+
time.sleep(5 * attempt)
586+
continue
581587
return True, "", parsed_results
582588

583589
# Handle non-zero return codes after parsing
@@ -607,22 +613,28 @@ def run_code_review(self, repo_dir: Path, prompt: str) -> Tuple[bool, str, Dict[
607613

608614
def _extract_review_findings(self, claude_output: Any) -> Dict[str, Any]:
609615
"""Extract review findings and PR summary from Claude's JSON response."""
610-
if isinstance(claude_output, dict):
611-
# Only accept Claude Code wrapper with result field
612-
# Direct format without wrapper is not supported
613-
if 'result' in claude_output:
614-
result_text = claude_output['result']
615-
if isinstance(result_text, str):
616-
# Try to extract JSON from the result text
617-
success, result_json = parse_json_with_fallbacks(result_text, "Claude result text")
618-
if success and result_json and 'findings' in result_json and 'pr_summary' in result_json:
619-
return result_json
620-
621-
# Return empty structure if no findings found
622-
return {
623-
'findings': [],
624-
'pr_summary': {}
625-
}
616+
if not isinstance(claude_output, dict):
617+
raise ValueError("Claude did not return a JSON object")
618+
619+
structured_output = claude_output.get('structured_output')
620+
if (
621+
isinstance(structured_output, dict)
622+
and 'findings' in structured_output
623+
and 'pr_summary' in structured_output
624+
):
625+
return structured_output
626+
627+
# Fall back to the older wrapper shape where the JSON payload
628+
# is serialized into the result field.
629+
result_text = claude_output.get('result')
630+
if isinstance(result_text, str):
631+
success, result_json = parse_json_with_fallbacks(result_text, "Claude result text")
632+
if success and result_json and 'findings' in result_json and 'pr_summary' in result_json:
633+
return result_json
634+
635+
raise ValueError(
636+
"Claude did not return a valid review payload in structured_output or result"
637+
)
626638

627639
def validate_claude_available(self) -> Tuple[bool, str]:
628640
"""Validate that Claude Code is available."""

claudecode/test_claude_runner.py

Lines changed: 46 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,8 @@
99
from unittest.mock import Mock, patch
1010
from pathlib import Path
1111

12+
import pytest
13+
1214
from claudecode.github_action_audit import SimpleClaudeRunner
1315
from claudecode.constants import DEFAULT_CLAUDE_MODEL
1416

@@ -179,7 +181,14 @@ def test_run_code_review_large_prompt_warning(self, mock_run, capsys):
179181
"""Test warning for large prompts."""
180182
mock_run.return_value = Mock(
181183
returncode=0,
182-
stdout='{"findings": []}',
184+
stdout=json.dumps({
185+
"type": "result",
186+
"subtype": "success",
187+
"structured_output": {
188+
"pr_summary": {"overview": "Large prompt test", "file_changes": []},
189+
"findings": [],
190+
},
191+
}),
183192
stderr=''
184193
)
185194

@@ -203,7 +212,18 @@ def test_run_code_review_retry_on_failure(self, mock_run):
203212
# First call fails, second succeeds
204213
mock_run.side_effect = [
205214
Mock(returncode=1, stdout='', stderr='Temporary error'),
206-
Mock(returncode=0, stdout='{"findings": []}', stderr='')
215+
Mock(
216+
returncode=0,
217+
stdout=json.dumps({
218+
"type": "result",
219+
"subtype": "success",
220+
"structured_output": {
221+
"pr_summary": {"overview": "Retry test", "file_changes": []},
222+
"findings": [],
223+
},
224+
}),
225+
stderr='',
226+
)
207227
]
208228

209229
runner = SimpleClaudeRunner()
@@ -304,6 +324,24 @@ def test_extract_review_findings_claude_wrapper(self):
304324
result = runner._extract_review_findings(claude_output)
305325
assert len(result['findings']) == 1
306326
assert result['findings'][0]['file'] == 'test.py'
327+
328+
def test_extract_review_findings_structured_output_wrapper(self):
329+
"""Test extraction from structured_output when result is empty."""
330+
runner = SimpleClaudeRunner()
331+
332+
claude_output = {
333+
"result": "",
334+
"structured_output": {
335+
"pr_summary": {"overview": "Test", "file_changes": []},
336+
"findings": [
337+
{"file": "test.py", "line": 10, "severity": "HIGH"}
338+
],
339+
},
340+
}
341+
342+
result = runner._extract_review_findings(claude_output)
343+
assert len(result["findings"]) == 1
344+
assert result["pr_summary"]["overview"] == "Test"
307345

308346
def test_extract_review_findings_direct_format(self):
309347
"""Test that direct findings format was removed - only wrapped format is supported."""
@@ -320,10 +358,8 @@ def test_extract_review_findings_direct_format(self):
320358
}
321359
}
322360

323-
result = runner._extract_review_findings(claude_output)
324-
# Should return empty structure since direct format is not supported
325-
assert len(result['findings']) == 0
326-
assert result['pr_summary'] == {}
361+
with pytest.raises(ValueError, match="valid review payload"):
362+
runner._extract_review_findings(claude_output)
327363

328364
def test_extract_review_findings_text_fallback(self):
329365
"""Test that text fallback was removed - only JSON is supported."""
@@ -334,20 +370,16 @@ def test_extract_review_findings_text_fallback(self):
334370
"result": "Found SQL injection vulnerability in database.py line 45"
335371
}
336372

337-
# Should return empty findings since we don't parse text anymore
338-
result = runner._extract_review_findings(claude_output)
339-
assert len(result['findings']) == 0
340-
assert result['pr_summary'] == {}
373+
with pytest.raises(ValueError, match="valid review payload"):
374+
runner._extract_review_findings(claude_output)
341375

342376
def test_extract_review_findings_empty(self):
343377
"""Test extraction with no findings."""
344378
runner = SimpleClaudeRunner()
345379

346-
# Various empty formats
347380
for output in [None, {}, {"result": ""}, {"other": "data"}]:
348-
result = runner._extract_review_findings(output)
349-
assert result['findings'] == []
350-
assert result['pr_summary'] == {}
381+
with pytest.raises(ValueError):
382+
runner._extract_review_findings(output)
351383

352384
def test_create_findings_from_text(self):
353385
"""Test that _create_findings_from_text was removed."""

claudecode/test_github_action_audit.py

Lines changed: 34 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,8 @@
33
Pytest tests for GitHub Action audit script components.
44
"""
55

6+
import pytest
7+
68

79
class TestImports:
810
"""Test that all required modules can be imported."""
@@ -455,8 +457,35 @@ def test_extract_findings_with_pr_summary(self):
455457
assert result['pr_summary']['file_changes'][0]['files'] == ['src/auth.py']
456458
assert len(result['findings']) == 1
457459

460+
def test_extract_findings_from_structured_output(self):
461+
"""Test that structured_output is accepted when result is empty."""
462+
from claudecode.github_action_audit import SimpleClaudeRunner
463+
464+
runner = SimpleClaudeRunner()
465+
466+
claude_output = {
467+
'result': '',
468+
'structured_output': {
469+
'findings': [
470+
{'file': 'test.py', 'line': 1, 'severity': 'HIGH', 'description': 'Test finding'}
471+
],
472+
'pr_summary': {
473+
'overview': 'This PR adds authentication middleware.',
474+
'file_changes': [
475+
{'label': 'src/auth.py', 'files': ['src/auth.py'], 'changes': 'Added JWT validation'}
476+
]
477+
}
478+
}
479+
}
480+
481+
result = runner._extract_review_findings(claude_output)
482+
483+
assert 'pr_summary' in result
484+
assert result['pr_summary']['overview'] == 'This PR adds authentication middleware.'
485+
assert len(result['findings']) == 1
486+
458487
def test_extract_findings_without_pr_summary(self):
459-
"""Test that missing pr_summary defaults to empty dict."""
488+
"""Test that missing pr_summary fails instead of degrading silently."""
460489
import json
461490
from claudecode.github_action_audit import SimpleClaudeRunner
462491

@@ -468,22 +497,17 @@ def test_extract_findings_without_pr_summary(self):
468497
})
469498
}
470499

471-
result = runner._extract_review_findings(claude_output)
472-
473-
assert 'pr_summary' in result
474-
assert result['pr_summary'] == {}
500+
with pytest.raises(ValueError, match="valid review payload"):
501+
runner._extract_review_findings(claude_output)
475502

476503
def test_extract_findings_empty_result(self):
477504
"""Test extraction with invalid/empty Claude output."""
478505
from claudecode.github_action_audit import SimpleClaudeRunner
479506

480507
runner = SimpleClaudeRunner()
481508

482-
result = runner._extract_review_findings({})
483-
484-
assert 'pr_summary' in result
485-
assert result['pr_summary'] == {}
486-
assert result['findings'] == []
509+
with pytest.raises(ValueError, match="valid review payload"):
510+
runner._extract_review_findings({})
487511

488512
def test_extract_findings_with_grouped_files(self):
489513
"""Test that grouped file_changes are handled correctly."""

claudecode/test_workflow_integration.py

Lines changed: 99 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,15 @@
1313
from claudecode.github_action_audit import main
1414

1515

16+
def claude_success_output(payload):
17+
"""Build a Claude CLI success wrapper with structured output."""
18+
return json.dumps({
19+
"type": "result",
20+
"subtype": "success",
21+
"structured_output": payload,
22+
})
23+
24+
1625
class TestFullWorkflowIntegration:
1726
"""Test complete workflow scenarios."""
1827

@@ -345,8 +354,28 @@ def test_workflow_with_llm_filtering(self, mock_get, mock_run):
345354

346355
mock_run.side_effect = [
347356
Mock(returncode=0, stdout='claude version 1.0.0', stderr=''),
348-
Mock(returncode=0, stdout=json.dumps({"findings": claude_findings}), stderr=''),
349-
Mock(returncode=0, stdout=json.dumps({"findings": []}), stderr='')
357+
Mock(
358+
returncode=0,
359+
stdout=claude_success_output({
360+
"pr_summary": {
361+
"overview": "Dependency update review.",
362+
"file_changes": [],
363+
},
364+
"findings": claude_findings,
365+
}),
366+
stderr='',
367+
),
368+
Mock(
369+
returncode=0,
370+
stdout=claude_success_output({
371+
"pr_summary": {
372+
"overview": "Dependency update review.",
373+
"file_changes": [],
374+
},
375+
"findings": [],
376+
}),
377+
stderr='',
378+
)
350379
]
351380

352381
with tempfile.TemporaryDirectory() as tmpdir:
@@ -423,8 +452,30 @@ def test_workflow_with_no_review_issues(self, mock_get, mock_run):
423452
# Claude finds no issues
424453
mock_run.side_effect = [
425454
Mock(returncode=0, stdout='claude version 1.0.0', stderr=''),
426-
Mock(returncode=0, stdout='{"findings": [], "analysis_summary": {"review_completed": true}}', stderr=''),
427-
Mock(returncode=0, stdout='{"findings": [], "analysis_summary": {"review_completed": true}}', stderr='')
455+
Mock(
456+
returncode=0,
457+
stdout=claude_success_output({
458+
"pr_summary": {
459+
"overview": "README-only update.",
460+
"file_changes": [],
461+
},
462+
"findings": [],
463+
"analysis_summary": {"review_completed": True},
464+
}),
465+
stderr='',
466+
),
467+
Mock(
468+
returncode=0,
469+
stdout=claude_success_output({
470+
"pr_summary": {
471+
"overview": "README-only update.",
472+
"file_changes": [],
473+
},
474+
"findings": [],
475+
"analysis_summary": {"review_completed": True},
476+
}),
477+
stderr='',
478+
)
428479
]
429480

430481
with tempfile.TemporaryDirectory() as tmpdir:
@@ -517,8 +568,28 @@ def test_workflow_with_massive_pr(self, mock_get, mock_run):
517568
# Claude handles it gracefully
518569
mock_run.side_effect = [
519570
Mock(returncode=0, stdout='claude version 1.0.0', stderr=''),
520-
Mock(returncode=0, stdout='{"findings": []}', stderr=''),
521-
Mock(returncode=0, stdout='{"findings": []}', stderr='')
571+
Mock(
572+
returncode=0,
573+
stdout=claude_success_output({
574+
"pr_summary": {
575+
"overview": "Large refactor review.",
576+
"file_changes": [],
577+
},
578+
"findings": [],
579+
}),
580+
stderr='',
581+
),
582+
Mock(
583+
returncode=0,
584+
stdout=claude_success_output({
585+
"pr_summary": {
586+
"overview": "Large refactor review.",
587+
"file_changes": [],
588+
},
589+
"findings": [],
590+
}),
591+
stderr='',
592+
)
522593
]
523594

524595
with tempfile.TemporaryDirectory() as tmpdir:
@@ -597,8 +668,28 @@ def test_workflow_with_binary_files(self, mock_get, mock_run):
597668

598669
mock_run.side_effect = [
599670
Mock(returncode=0, stdout='claude version 1.0.0', stderr=''),
600-
Mock(returncode=0, stdout='{"findings": []}', stderr=''),
601-
Mock(returncode=0, stdout='{"findings": []}', stderr='')
671+
Mock(
672+
returncode=0,
673+
stdout=claude_success_output({
674+
"pr_summary": {
675+
"overview": "Binary file review.",
676+
"file_changes": [],
677+
},
678+
"findings": [],
679+
}),
680+
stderr='',
681+
),
682+
Mock(
683+
returncode=0,
684+
stdout=claude_success_output({
685+
"pr_summary": {
686+
"overview": "Binary file review.",
687+
"file_changes": [],
688+
},
689+
"findings": [],
690+
}),
691+
stderr='',
692+
)
602693
]
603694

604695
with tempfile.TemporaryDirectory() as tmpdir:

0 commit comments

Comments
 (0)