Handle structured output review payloads (#33)

danielmartin · web-flow · commit 93563fc31c11 · 2026-03-06T20:02:26.000+01:00
This fixes a failure mode in the review parser. Right now we only parse Claude's serialized `result` field. In recent runs, Claude sometimes returned the valid review payload in `structured_output` while `result` was empty, which made the action drop a good response and degrade into an empty review. This patch checks `structured_output` first, falls back to `result` for compatibility, and fails the run if neither contains a valid review payload. That follows the same direction as Anthropic's `claude-code-action`, which treats `structured_output` as the schema-backed output path: https://github.com/anthropics/claude-code-action/blob/main/base-action/src/run-claude-sdk.ts#L208-L228 Tests are updated to cover the `structured_output` path and the failure case.
diff --git a/claudecode/github_action_audit.py b/claudecode/github_action_audit.py
@@ -577,7 +577,13 @@ def run_code_review(self, repo_dir: Path, prompt: str) -> Tuple[bool, str, Dict[
 
                     # If returncode is 0, extract review findings
                     if result.returncode == 0:
-                        parsed_results = self._extract_review_findings(parsed_result)
+                        try:
+                            parsed_results = self._extract_review_findings(parsed_result)
+                        except ValueError as error:
+                            if attempt == NUM_RETRIES - 1:
+                                return False, str(error), {}
+                            time.sleep(5 * attempt)
+                            continue
                         return True, "", parsed_results
 
                 # Handle non-zero return codes after parsing
@@ -607,22 +613,28 @@ def run_code_review(self, repo_dir: Path, prompt: str) -> Tuple[bool, str, Dict[
     
     def _extract_review_findings(self, claude_output: Any) -> Dict[str, Any]:
         """Extract review findings and PR summary from Claude's JSON response."""
-        if isinstance(claude_output, dict):
-            # Only accept Claude Code wrapper with result field
-            # Direct format without wrapper is not supported
-            if 'result' in claude_output:
-                result_text = claude_output['result']
-                if isinstance(result_text, str):
-                    # Try to extract JSON from the result text
-                    success, result_json = parse_json_with_fallbacks(result_text, "Claude result text")
-                    if success and result_json and 'findings' in result_json and 'pr_summary' in result_json:
-                        return result_json
-
-        # Return empty structure if no findings found
-        return {
-            'findings': [],
-            'pr_summary': {}
-        }
+        if not isinstance(claude_output, dict):
+            raise ValueError("Claude did not return a JSON object")
+
+        structured_output = claude_output.get('structured_output')
+        if (
+            isinstance(structured_output, dict)
+            and 'findings' in structured_output
+            and 'pr_summary' in structured_output
+        ):
+            return structured_output
+
+        # Fall back to the older wrapper shape where the JSON payload
+        # is serialized into the result field.
+        result_text = claude_output.get('result')
+        if isinstance(result_text, str):
+            success, result_json = parse_json_with_fallbacks(result_text, "Claude result text")
+            if success and result_json and 'findings' in result_json and 'pr_summary' in result_json:
+                return result_json
+
+        raise ValueError(
+            "Claude did not return a valid review payload in structured_output or result"
+        )
 
     def validate_claude_available(self) -> Tuple[bool, str]:
         """Validate that Claude Code is available."""
diff --git a/claudecode/test_claude_runner.py b/claudecode/test_claude_runner.py
@@ -9,6 +9,8 @@
 from unittest.mock import Mock, patch
 from pathlib import Path
 
+import pytest
+
 from claudecode.github_action_audit import SimpleClaudeRunner
 from claudecode.constants import DEFAULT_CLAUDE_MODEL
 
@@ -179,7 +181,14 @@ def test_run_code_review_large_prompt_warning(self, mock_run, capsys):
         """Test warning for large prompts."""
         mock_run.return_value = Mock(
             returncode=0,
-            stdout='{"findings": []}',
+            stdout=json.dumps({
+                "type": "result",
+                "subtype": "success",
+                "structured_output": {
+                    "pr_summary": {"overview": "Large prompt test", "file_changes": []},
+                    "findings": [],
+                },
+            }),
             stderr=''
         )
         
@@ -203,7 +212,18 @@ def test_run_code_review_retry_on_failure(self, mock_run):
         # First call fails, second succeeds
         mock_run.side_effect = [
             Mock(returncode=1, stdout='', stderr='Temporary error'),
-            Mock(returncode=0, stdout='{"findings": []}', stderr='')
+            Mock(
+                returncode=0,
+                stdout=json.dumps({
+                    "type": "result",
+                    "subtype": "success",
+                    "structured_output": {
+                        "pr_summary": {"overview": "Retry test", "file_changes": []},
+                        "findings": [],
+                    },
+                }),
+                stderr='',
+            )
         ]
         
         runner = SimpleClaudeRunner()
@@ -304,6 +324,24 @@ def test_extract_review_findings_claude_wrapper(self):
         result = runner._extract_review_findings(claude_output)
         assert len(result['findings']) == 1
         assert result['findings'][0]['file'] == 'test.py'
+
+    def test_extract_review_findings_structured_output_wrapper(self):
+        """Test extraction from structured_output when result is empty."""
+        runner = SimpleClaudeRunner()
+
+        claude_output = {
+            "result": "",
+            "structured_output": {
+                "pr_summary": {"overview": "Test", "file_changes": []},
+                "findings": [
+                    {"file": "test.py", "line": 10, "severity": "HIGH"}
+                ],
+            },
+        }
+
+        result = runner._extract_review_findings(claude_output)
+        assert len(result["findings"]) == 1
+        assert result["pr_summary"]["overview"] == "Test"
     
     def test_extract_review_findings_direct_format(self):
         """Test that direct findings format was removed - only wrapped format is supported."""
@@ -320,10 +358,8 @@ def test_extract_review_findings_direct_format(self):
             }
         }
 
-        result = runner._extract_review_findings(claude_output)
-        # Should return empty structure since direct format is not supported
-        assert len(result['findings']) == 0
-        assert result['pr_summary'] == {}
+        with pytest.raises(ValueError, match="valid review payload"):
+            runner._extract_review_findings(claude_output)
     
     def test_extract_review_findings_text_fallback(self):
         """Test that text fallback was removed - only JSON is supported."""
@@ -334,20 +370,16 @@ def test_extract_review_findings_text_fallback(self):
             "result": "Found SQL injection vulnerability in database.py line 45"
         }
 
-        # Should return empty findings since we don't parse text anymore
-        result = runner._extract_review_findings(claude_output)
-        assert len(result['findings']) == 0
-        assert result['pr_summary'] == {}
+        with pytest.raises(ValueError, match="valid review payload"):
+            runner._extract_review_findings(claude_output)
     
     def test_extract_review_findings_empty(self):
         """Test extraction with no findings."""
         runner = SimpleClaudeRunner()
 
-        # Various empty formats
         for output in [None, {}, {"result": ""}, {"other": "data"}]:
-            result = runner._extract_review_findings(output)
-            assert result['findings'] == []
-            assert result['pr_summary'] == {}
+            with pytest.raises(ValueError):
+                runner._extract_review_findings(output)
     
     def test_create_findings_from_text(self):
         """Test that _create_findings_from_text was removed."""
diff --git a/claudecode/test_github_action_audit.py b/claudecode/test_github_action_audit.py
@@ -3,6 +3,8 @@
 Pytest tests for GitHub Action audit script components.
 """
 
+import pytest
+
 
 class TestImports:
     """Test that all required modules can be imported."""
@@ -455,8 +457,35 @@ def test_extract_findings_with_pr_summary(self):
         assert result['pr_summary']['file_changes'][0]['files'] == ['src/auth.py']
         assert len(result['findings']) == 1
 
+    def test_extract_findings_from_structured_output(self):
+        """Test that structured_output is accepted when result is empty."""
+        from claudecode.github_action_audit import SimpleClaudeRunner
+
+        runner = SimpleClaudeRunner()
+
+        claude_output = {
+            'result': '',
+            'structured_output': {
+                'findings': [
+                    {'file': 'test.py', 'line': 1, 'severity': 'HIGH', 'description': 'Test finding'}
+                ],
+                'pr_summary': {
+                    'overview': 'This PR adds authentication middleware.',
+                    'file_changes': [
+                        {'label': 'src/auth.py', 'files': ['src/auth.py'], 'changes': 'Added JWT validation'}
+                    ]
+                }
+            }
+        }
+
+        result = runner._extract_review_findings(claude_output)
+
+        assert 'pr_summary' in result
+        assert result['pr_summary']['overview'] == 'This PR adds authentication middleware.'
+        assert len(result['findings']) == 1
+
     def test_extract_findings_without_pr_summary(self):
-        """Test that missing pr_summary defaults to empty dict."""
+        """Test that missing pr_summary fails instead of degrading silently."""
         import json
         from claudecode.github_action_audit import SimpleClaudeRunner
 
@@ -468,22 +497,17 @@ def test_extract_findings_without_pr_summary(self):
             })
         }
 
-        result = runner._extract_review_findings(claude_output)
-
-        assert 'pr_summary' in result
-        assert result['pr_summary'] == {}
+        with pytest.raises(ValueError, match="valid review payload"):
+            runner._extract_review_findings(claude_output)
 
     def test_extract_findings_empty_result(self):
         """Test extraction with invalid/empty Claude output."""
         from claudecode.github_action_audit import SimpleClaudeRunner
 
         runner = SimpleClaudeRunner()
 
-        result = runner._extract_review_findings({})
-
-        assert 'pr_summary' in result
-        assert result['pr_summary'] == {}
-        assert result['findings'] == []
+        with pytest.raises(ValueError, match="valid review payload"):
+            runner._extract_review_findings({})
 
     def test_extract_findings_with_grouped_files(self):
         """Test that grouped file_changes are handled correctly."""
diff --git a/claudecode/test_workflow_integration.py b/claudecode/test_workflow_integration.py
@@ -13,6 +13,15 @@
 from claudecode.github_action_audit import main
 
 
+def claude_success_output(payload):
+    """Build a Claude CLI success wrapper with structured output."""
+    return json.dumps({
+        "type": "result",
+        "subtype": "success",
+        "structured_output": payload,
+    })
+
+
 class TestFullWorkflowIntegration:
     """Test complete workflow scenarios."""
     
@@ -345,8 +354,28 @@ def test_workflow_with_llm_filtering(self, mock_get, mock_run):
         
         mock_run.side_effect = [
             Mock(returncode=0, stdout='claude version 1.0.0', stderr=''),
-            Mock(returncode=0, stdout=json.dumps({"findings": claude_findings}), stderr=''),
-            Mock(returncode=0, stdout=json.dumps({"findings": []}), stderr='')
+            Mock(
+                returncode=0,
+                stdout=claude_success_output({
+                    "pr_summary": {
+                        "overview": "Dependency update review.",
+                        "file_changes": [],
+                    },
+                    "findings": claude_findings,
+                }),
+                stderr='',
+            ),
+            Mock(
+                returncode=0,
+                stdout=claude_success_output({
+                    "pr_summary": {
+                        "overview": "Dependency update review.",
+                        "file_changes": [],
+                    },
+                    "findings": [],
+                }),
+                stderr='',
+            )
         ]
         
         with tempfile.TemporaryDirectory() as tmpdir:
@@ -423,8 +452,30 @@ def test_workflow_with_no_review_issues(self, mock_get, mock_run):
         # Claude finds no issues
         mock_run.side_effect = [
             Mock(returncode=0, stdout='claude version 1.0.0', stderr=''),
-            Mock(returncode=0, stdout='{"findings": [], "analysis_summary": {"review_completed": true}}', stderr=''),
-            Mock(returncode=0, stdout='{"findings": [], "analysis_summary": {"review_completed": true}}', stderr='')
+            Mock(
+                returncode=0,
+                stdout=claude_success_output({
+                    "pr_summary": {
+                        "overview": "README-only update.",
+                        "file_changes": [],
+                    },
+                    "findings": [],
+                    "analysis_summary": {"review_completed": True},
+                }),
+                stderr='',
+            ),
+            Mock(
+                returncode=0,
+                stdout=claude_success_output({
+                    "pr_summary": {
+                        "overview": "README-only update.",
+                        "file_changes": [],
+                    },
+                    "findings": [],
+                    "analysis_summary": {"review_completed": True},
+                }),
+                stderr='',
+            )
         ]
         
         with tempfile.TemporaryDirectory() as tmpdir:
@@ -517,8 +568,28 @@ def test_workflow_with_massive_pr(self, mock_get, mock_run):
         # Claude handles it gracefully
         mock_run.side_effect = [
             Mock(returncode=0, stdout='claude version 1.0.0', stderr=''),
-            Mock(returncode=0, stdout='{"findings": []}', stderr=''),
-            Mock(returncode=0, stdout='{"findings": []}', stderr='')
+            Mock(
+                returncode=0,
+                stdout=claude_success_output({
+                    "pr_summary": {
+                        "overview": "Large refactor review.",
+                        "file_changes": [],
+                    },
+                    "findings": [],
+                }),
+                stderr='',
+            ),
+            Mock(
+                returncode=0,
+                stdout=claude_success_output({
+                    "pr_summary": {
+                        "overview": "Large refactor review.",
+                        "file_changes": [],
+                    },
+                    "findings": [],
+                }),
+                stderr='',
+            )
         ]
         
         with tempfile.TemporaryDirectory() as tmpdir:
@@ -597,8 +668,28 @@ def test_workflow_with_binary_files(self, mock_get, mock_run):
         
         mock_run.side_effect = [
             Mock(returncode=0, stdout='claude version 1.0.0', stderr=''),
-            Mock(returncode=0, stdout='{"findings": []}', stderr=''),
-            Mock(returncode=0, stdout='{"findings": []}', stderr='')
+            Mock(
+                returncode=0,
+                stdout=claude_success_output({
+                    "pr_summary": {
+                        "overview": "Binary file review.",
+                        "file_changes": [],
+                    },
+                    "findings": [],
+                }),
+                stderr='',
+            ),
+            Mock(
+                returncode=0,
+                stdout=claude_success_output({
+                    "pr_summary": {
+                        "overview": "Binary file review.",
+                        "file_changes": [],
+                    },
+                    "findings": [],
+                }),
+                stderr='',
+            )
         ]
         
         with tempfile.TemporaryDirectory() as tmpdir: