From c5891792a4ba1ed12a7fc0b7bbdd6a4af988bb87 Mon Sep 17 00:00:00 2001 From: Matej Bukovinski Date: Fri, 6 Feb 2026 17:25:23 +0100 Subject: [PATCH] Remove legacy prompt/security-audit APIs and unify tests --- claudecode/github_action_audit.py | 14 - claudecode/prompts.py | 433 ++-------------------- claudecode/test_claude_runner.py | 52 +-- claudecode/test_github_action_audit.py | 13 +- claudecode/test_prompts.py | 400 ++++++-------------- docs/custom-security-scan-instructions.md | 2 +- 6 files changed, 172 insertions(+), 742 deletions(-) diff --git a/claudecode/github_action_audit.py b/claudecode/github_action_audit.py index 2299df0..25bb3fc 100644 --- a/claudecode/github_action_audit.py +++ b/claudecode/github_action_audit.py @@ -389,15 +389,6 @@ def _extract_review_findings(self, claude_output: Any) -> Dict[str, Any]: } } - # Backward-compatible alias - def run_security_audit(self, repo_dir: Path, prompt: str) -> Tuple[bool, str, Dict[str, Any]]: - return self.run_code_review(repo_dir, prompt) - - # Backward-compatible alias - def _extract_security_findings(self, claude_output: Any) -> Dict[str, Any]: - return self._extract_review_findings(claude_output) - - def validate_claude_available(self) -> Tuple[bool, str]: """Validate that Claude Code is available.""" try: @@ -540,11 +531,6 @@ def run_code_review(claude_runner: SimpleClaudeRunner, prompt: str) -> Dict[str, return results -def run_security_audit(claude_runner: SimpleClaudeRunner, prompt: str) -> Dict[str, Any]: - """Backward-compatible wrapper for previous security audit runner.""" - return run_code_review(claude_runner, prompt) - - def apply_findings_filter(findings_filter, original_findings: List[Dict[str, Any]], pr_context: Dict[str, Any], github_client: GitHubActionClient) -> Tuple[List[Dict[str, Any]], List[Dict[str, Any]], Dict[str, Any]]: """Apply findings filter to reduce false positives. diff --git a/claudecode/prompts.py b/claudecode/prompts.py index 7c9cc75..54d15b6 100644 --- a/claudecode/prompts.py +++ b/claudecode/prompts.py @@ -1,6 +1,37 @@ """Code review prompt templates.""" +def _format_files_changed(pr_data): + """Format changed files for prompt context.""" + return "\n".join([f"- {f['filename']}" for f in pr_data['files']]) + + +def _build_diff_section(pr_diff, include_diff): + """Build prompt section for inline diff or agentic file reading.""" + if pr_diff and include_diff: + return f""" + +PR DIFF CONTENT: +``` +{pr_diff} +``` + +Review the complete diff above. This contains all code changes in the PR. +""" + + return """ + +IMPORTANT - FILE READING INSTRUCTIONS: +You have access to the repository files. For each file listed above, use the Read tool to examine the changes. +Focus on the files that are most likely to contain issues based on the PR context. + +To review effectively: +1. Read each modified file to understand the current code +2. Look at surrounding code context when needed to understand the changes +3. Check related files if you need to understand dependencies or usage patterns +""" + + def get_unified_review_prompt( pr_data, pr_diff=None, @@ -24,31 +55,8 @@ def get_unified_review_prompt( Formatted prompt string """ - files_changed = "\n".join([f"- {f['filename']}" for f in pr_data['files']]) - - diff_section = "" - if pr_diff and include_diff: - diff_section = f""" - -PR DIFF CONTENT: -``` -{pr_diff} -``` - -Review the complete diff above. This contains all code changes in the PR. -""" - else: - diff_section = """ - -IMPORTANT - FILE READING INSTRUCTIONS: -You have access to the repository files. For each file listed above, use the Read tool to examine the changes. -Focus on the files that are most likely to contain issues based on the PR context. - -To review effectively: -1. Read each modified file to understand the current code -2. Look at surrounding code context when needed to understand the changes -3. Check related files if you need to understand dependencies or usage patterns -""" + files_changed = _format_files_changed(pr_data) + diff_section = _build_diff_section(pr_diff, include_diff) custom_review_section = "" if custom_review_instructions: @@ -226,378 +234,3 @@ def get_unified_review_prompt( Your final reply must contain the JSON and nothing else. You should not reply again after outputting the JSON. """ - - -def get_code_review_prompt( - pr_data, - pr_diff=None, - include_diff=True, - custom_review_instructions=None, -): - """Generate code review prompt for Claude Code. - - Note: This prompt focuses on code quality (correctness, reliability, performance, - maintainability, testing). Security review is handled separately by get_security_review_prompt. - - Args: - pr_data: PR data dictionary from GitHub API - pr_diff: Optional complete PR diff in unified format - include_diff: Whether to include the diff in the prompt (default: True) - custom_review_instructions: Optional custom review instructions to append - - Returns: - Formatted prompt string - """ - - files_changed = "\n".join([f"- {f['filename']}" for f in pr_data['files']]) - - # Add diff section if provided and include_diff is True - diff_section = "" - if pr_diff and include_diff: - diff_section = f""" - -PR DIFF CONTENT: -``` -{pr_diff} -``` - -Review the complete diff above. This contains all code changes in the PR. -""" - else: - diff_section = """ - -IMPORTANT - FILE READING INSTRUCTIONS: -You have access to the repository files. For each file listed above, use the Read tool to examine the changes. -Focus on the files that are most likely to contain issues based on the PR context. - -To review effectively: -1. Read each modified file to understand the current code -2. Look at surrounding code context when needed to understand the changes -3. Check related files if you need to understand dependencies or usage patterns -""" - - # Add custom instructions if provided - custom_review_section = "" - if custom_review_instructions: - custom_review_section = f"\n{custom_review_instructions}\n" - - return f""" -You are a senior engineer conducting a high-signal code review of GitHub PR #{pr_data['number']}: "{pr_data['title']}" - -CONTEXT: -- Repository: {pr_data.get('head', {}).get('repo', {}).get('full_name', 'unknown')} -- Author: {pr_data['user']} -- Files changed: {pr_data['changed_files']} -- Lines added: {pr_data['additions']} -- Lines deleted: {pr_data['deletions']} - -Files modified: -{files_changed}{diff_section} - -OBJECTIVE: -Perform a focused, high-signal code review to identify HIGH-CONFIDENCE issues introduced by this PR. This covers correctness, reliability, performance, maintainability, and testing. Security issues are reviewed separately. Do not comment on pre-existing issues or purely stylistic preferences. - -CRITICAL INSTRUCTIONS: -1. MINIMIZE FALSE POSITIVES: Only flag issues where you're >80% confident they are real and impactful -2. AVOID NOISE: Skip style nits, subjective preferences, or low-impact suggestions -3. FOCUS ON IMPACT: Prioritize bugs, regressions, data loss, or significant performance problems -4. SCOPE: Only evaluate code introduced or modified in this PR. Ignore unrelated existing issues -5. NO SECURITY: Do not flag security issues - those are handled by a dedicated security review - -REVIEW CATEGORIES TO EXAMINE: - -**Correctness & Logic:** -- Incorrect business logic or wrong results -- Edge cases or null/empty handling regressions -- Incorrect error handling or missing validations leading to bad state -- Invariants broken by changes - -**Reliability & Resilience:** -- Concurrency or race conditions introduced by changes -- Resource leaks, timeouts, or missing retries in critical paths -- Partial failure handling or inconsistent state updates -- Idempotency or ordering issues - -**Performance & Scalability:** -- Algorithmic regressions in hot paths (O(n^2) where O(n) expected) -- N+1 query patterns -- Excessive synchronous I/O in latency-sensitive code -- Unbounded memory growth introduced by changes - -**Maintainability & Design:** -- Changes that significantly increase complexity or make future changes risky -- Tight coupling or unclear responsibility boundaries introduced -- Misleading APIs or brittle contracts - -**Testing & Observability:** -- Missing tests for high-risk changes -- Lack of logging/metrics around new critical behavior -- Flaky behavior due to nondeterministic changes -{custom_review_section} -ANALYSIS METHODOLOGY: - -Phase 1 - Repository Context Research (Use file search tools): -- Identify existing patterns, conventions, and critical paths -- Understand data flow, invariants, and error handling expectations -- Note existing testing and observability practices - -Phase 2 - Comparative Analysis: -- Compare new changes to existing patterns and contracts -- Identify deviations that introduce risk or regressions -- Look for inconsistent handling between similar code paths - -Phase 3 - Issue Assessment: -- Examine each modified file for correctness, reliability, performance, maintainability, and testing implications -- Trace data flow to identify logic errors or state management risks -- Identify concurrency and resource management issues - -REQUIRED OUTPUT FORMAT: - -You MUST output your findings as structured JSON with this exact schema: - -{{ - "findings": [ - {{ - "file": "path/to/file.py", - "line": 42, - "severity": "HIGH", - "category": "correctness|reliability|performance|maintainability|testing", - "title": "Short summary of the issue", - "description": "What is wrong and where it happens", - "impact": "Concrete impact or failure mode", - "recommendation": "Actionable fix or mitigation", - "suggestion": "Exact replacement code (optional). Can be multi-line. Must replace lines from suggestion_start_line to suggestion_end_line.", - "suggestion_start_line": 42, - "suggestion_end_line": 44, - "confidence": 0.95 - }} - ], - "analysis_summary": {{ - "files_reviewed": 8, - "high_severity": 1, - "medium_severity": 0, - "low_severity": 0, - "review_completed": true - }} -}} - -SUGGESTION GUIDELINES: -- Only include `suggestion` if you can provide exact, working replacement code -- For single-line fixes: set suggestion_start_line = suggestion_end_line = the line number -- For multi-line fixes: set the range of lines being replaced -- The suggestion replaces all lines from suggestion_start_line to suggestion_end_line (inclusive) - -SEVERITY GUIDELINES: -- **HIGH**: Likely production bug, data loss, or significant regression -- **MEDIUM**: Real issue with limited scope or specific triggering conditions -- **LOW**: Minor but real issue; use sparingly and only if clearly actionable - -CONFIDENCE SCORING: -- 0.9-1.0: Certain issue with clear evidence and impact -- 0.8-0.9: Strong signal with likely real-world impact -- 0.7-0.8: Plausible issue but may require specific conditions -- Below 0.7: Don't report (too speculative) - -FINAL REMINDER: -Focus on HIGH and MEDIUM findings only. Better to miss some theoretical issues than flood the report with false positives. Each finding should be something a senior engineer would confidently raise in a PR review. Do NOT report security issues - those are handled separately. - -Begin your analysis now. Use the repository exploration tools to understand the codebase context, then analyze the PR changes for code quality implications. - -Your final reply must contain the JSON and nothing else. You should not reply again after outputting the JSON. -""" - - -def get_security_review_prompt( - pr_data, - pr_diff=None, - include_diff=True, - custom_security_instructions=None, -): - """Generate security-focused review prompt for Claude Code.""" - - files_changed = "\n".join([f"- {f['filename']}" for f in pr_data['files']]) - - diff_section = "" - if pr_diff and include_diff: - diff_section = f""" - -PR DIFF CONTENT: -``` -{pr_diff} -``` - -Review the complete diff above. This contains all code changes in the PR. -""" - else: - diff_section = """ - -IMPORTANT - FILE READING INSTRUCTIONS: -You have access to the repository files. For each file listed above, use the Read tool to examine the changes. -Focus on the files that are most likely to contain issues based on the PR context. - -To review effectively: -1. Read each modified file to understand the current code -2. Look at surrounding code context when needed to understand the changes -3. Check related files if you need to understand dependencies or usage patterns -""" - - custom_security_section = "" - if custom_security_instructions: - custom_security_section = f"\n{custom_security_instructions}\n" - - return f""" -You are a senior security engineer conducting a focused security review of GitHub PR #{pr_data['number']}: "{pr_data['title']}" - -CONTEXT: -- Repository: {pr_data.get('head', {}).get('repo', {}).get('full_name', 'unknown')} -- Author: {pr_data['user']} -- Files changed: {pr_data['changed_files']} -- Lines added: {pr_data['additions']} -- Lines deleted: {pr_data['deletions']} - -Files modified: -{files_changed}{diff_section} - -OBJECTIVE: -Perform a security-focused code review to identify HIGH-CONFIDENCE security vulnerabilities newly introduced by this PR. Do not comment on pre-existing issues or general code quality. - -CRITICAL INSTRUCTIONS: -1. MINIMIZE FALSE POSITIVES: Only flag issues where you're >80% confident of actual exploitability -2. AVOID NOISE: Skip theoretical issues, style concerns, or low-impact findings -3. FOCUS ON IMPACT: Prioritize vulnerabilities that could lead to unauthorized access, data breaches, or system compromise -4. EXCLUSIONS: Do NOT report the following issue types: - - Denial of Service (DOS) vulnerabilities, even if they allow service disruption - - Secrets or sensitive data stored on disk (these are handled by other processes) - - Rate limiting or resource exhaustion issues - -SECURITY CATEGORIES TO EXAMINE: - -**Input Validation Vulnerabilities:** -- SQL injection via unsanitized user input -- Command injection in system calls or subprocesses -- XXE injection in XML parsing -- Template injection in templating engines -- NoSQL injection in database queries -- Path traversal in file operations - -**Authentication & Authorization Issues:** -- Authentication bypass logic -- Privilege escalation paths -- Session management flaws -- JWT token vulnerabilities -- Authorization logic bypasses - -**Crypto & Secrets Management:** -- Hardcoded API keys, passwords, or tokens -- Weak cryptographic algorithms or implementations -- Improper key storage or management -- Cryptographic randomness issues -- Certificate validation bypasses - -**Injection & Code Execution:** -- Remote code execution via deserialization -- Pickle injection in Python -- YAML deserialization vulnerabilities -- Eval injection in dynamic code execution -- XSS vulnerabilities in web applications (reflected, stored, DOM-based) - -**Data Exposure:** -- Sensitive data logging or storage -- PII handling violations -- API endpoint data leakage -- Debug information exposure -{custom_security_section} -Additional notes: -- Even if something is only exploitable from the local network, it can still be a HIGH severity issue - -ANALYSIS METHODOLOGY: - -Phase 1 - Repository Context Research (Use file search tools): -- Identify existing security frameworks and libraries in use -- Look for established secure coding patterns in the codebase -- Examine existing sanitization and validation patterns -- Understand the project's security model and threat model - -Phase 2 - Comparative Analysis: -- Compare new code changes against existing security patterns -- Identify deviations from established secure practices -- Look for inconsistent security implementations -- Flag code that introduces new attack surfaces - -Phase 3 - Vulnerability Assessment: -- Examine each modified file for security implications -- Trace data flow from user inputs to sensitive operations -- Look for privilege boundaries being crossed unsafely -- Identify injection points and unsafe deserialization - -REQUIRED OUTPUT FORMAT: - -You MUST output your findings as structured JSON with this exact schema: - -{{ - "findings": [ - {{ - "file": "path/to/file.py", - "line": 42, - "severity": "HIGH", - "category": "security", - "title": "Short summary of the issue", - "description": "What is wrong and where it happens", - "impact": "Exploit scenario or concrete impact", - "recommendation": "Actionable fix or mitigation", - "suggestion": "Exact replacement code (optional). Can be multi-line. Must replace lines from suggestion_start_line to suggestion_end_line.", - "suggestion_start_line": 42, - "suggestion_end_line": 44, - "confidence": 0.95 - }} - ], - "analysis_summary": {{ - "files_reviewed": 8, - "high_severity": 1, - "medium_severity": 0, - "low_severity": 0, - "review_completed": true - }} -}} - -SUGGESTION GUIDELINES: -- Only include `suggestion` if you can provide exact, working replacement code -- For single-line fixes: set suggestion_start_line = suggestion_end_line = the line number -- For multi-line fixes: set the range of lines being replaced -- The suggestion replaces all lines from suggestion_start_line to suggestion_end_line (inclusive) - -SEVERITY GUIDELINES: -- **HIGH**: Directly exploitable vulnerabilities leading to RCE, data breach, or authentication bypass -- **MEDIUM**: Vulnerabilities requiring specific conditions but with significant impact -- **LOW**: Defense-in-depth issues or lower-impact vulnerabilities - -CONFIDENCE SCORING: -- 0.9-1.0: Certain exploit path identified, tested if possible -- 0.8-0.9: Clear vulnerability pattern with known exploitation methods -- 0.7-0.8: Suspicious pattern requiring specific conditions to exploit -- Below 0.7: Don't report (too speculative) - -FINAL REMINDER: -Focus on HIGH and MEDIUM findings only. Better to miss some theoretical issues than flood the report with false positives. Each finding should be something a security engineer would confidently raise in a PR review. - -IMPORTANT EXCLUSIONS - DO NOT REPORT: -- Denial of Service (DOS) vulnerabilities or resource exhaustion attacks -- Secrets/credentials stored on disk (these are managed separately) -- Rate limiting concerns or service overload scenarios -- Memory consumption or CPU exhaustion issues -- Lack of input validation on non-security-critical fields without proven security impact - -Begin your analysis now. Use the repository exploration tools to understand the codebase context, then analyze the PR changes for security implications. - -Your final reply must contain the JSON and nothing else. You should not reply again after outputting the JSON. -""" - - -def get_security_audit_prompt(pr_data, pr_diff=None, include_diff=True, custom_scan_instructions=None): - """Backward-compatible wrapper for previous security-only prompt.""" - return get_security_review_prompt( - pr_data, - pr_diff=pr_diff, - include_diff=include_diff, - custom_security_instructions=custom_scan_instructions, - ) diff --git a/claudecode/test_claude_runner.py b/claudecode/test_claude_runner.py index fbfdf6c..1133dcf 100644 --- a/claudecode/test_claude_runner.py +++ b/claudecode/test_claude_runner.py @@ -104,10 +104,10 @@ def test_validate_claude_available_timeout(self, mock_run): assert success is False assert 'timed out' in error - def test_run_security_audit_missing_directory(self): + def test_run_code_review_missing_directory(self): """Test audit with missing directory.""" runner = SimpleClaudeRunner() - success, error, results = runner.run_security_audit( + success, error, results = runner.run_code_review( Path('/non/existent/path'), "test prompt" ) @@ -117,8 +117,8 @@ def test_run_security_audit_missing_directory(self): assert results == {} @patch('subprocess.run') - def test_run_security_audit_success(self, mock_run): - """Test successful security audit.""" + def test_run_code_review_success(self, mock_run): + """Test successful code review.""" # Claude Code returns wrapped format with 'result' field findings_data = { "findings": [ @@ -150,7 +150,7 @@ def test_run_security_audit_success(self, mock_run): runner = SimpleClaudeRunner() with patch('pathlib.Path.exists', return_value=True): - success, error, results = runner.run_security_audit( + success, error, results = runner.run_code_review( Path('/tmp/test'), "test prompt" ) @@ -173,7 +173,7 @@ def test_run_security_audit_success(self, mock_run): assert call_args[1]['cwd'] == Path('/tmp/test') @patch('subprocess.run') - def test_run_security_audit_large_prompt_warning(self, mock_run, capsys): + def test_run_code_review_large_prompt_warning(self, mock_run, capsys): """Test warning for large prompts.""" mock_run.return_value = Mock( returncode=0, @@ -186,7 +186,7 @@ def test_run_security_audit_large_prompt_warning(self, mock_run, capsys): runner = SimpleClaudeRunner() with patch('pathlib.Path.exists', return_value=True): - success, error, results = runner.run_security_audit( + success, error, results = runner.run_code_review( Path('/tmp/test'), large_prompt ) @@ -196,7 +196,7 @@ def test_run_security_audit_large_prompt_warning(self, mock_run, capsys): assert success is True @patch('subprocess.run') - def test_run_security_audit_retry_on_failure(self, mock_run): + def test_run_code_review_retry_on_failure(self, mock_run): """Test retry logic on failure.""" # First call fails, second succeeds mock_run.side_effect = [ @@ -206,7 +206,7 @@ def test_run_security_audit_retry_on_failure(self, mock_run): runner = SimpleClaudeRunner() with patch('pathlib.Path.exists', return_value=True): - success, error, results = runner.run_security_audit( + success, error, results = runner.run_code_review( Path('/tmp/test'), "test prompt" ) @@ -216,7 +216,7 @@ def test_run_security_audit_retry_on_failure(self, mock_run): assert mock_run.call_count == 2 # Retried once @patch('subprocess.run') - def test_run_security_audit_retry_on_error_during_execution(self, mock_run): + def test_run_code_review_retry_on_error_during_execution(self, mock_run): """Test retry on error_during_execution result.""" error_result = { "type": "result", @@ -244,7 +244,7 @@ def test_run_security_audit_retry_on_error_during_execution(self, mock_run): runner = SimpleClaudeRunner() with patch('pathlib.Path.exists', return_value=True): - success, error, results = runner.run_security_audit( + success, error, results = runner.run_code_review( Path('/tmp/test'), "test prompt" ) @@ -254,13 +254,13 @@ def test_run_security_audit_retry_on_error_during_execution(self, mock_run): assert mock_run.call_count == 2 @patch('subprocess.run') - def test_run_security_audit_timeout(self, mock_run): + def test_run_code_review_timeout(self, mock_run): """Test timeout handling.""" mock_run.side_effect = subprocess.TimeoutExpired(['claude'], 1200) runner = SimpleClaudeRunner() with patch('pathlib.Path.exists', return_value=True): - success, error, results = runner.run_security_audit( + success, error, results = runner.run_code_review( Path('/tmp/test'), "test prompt" ) @@ -270,7 +270,7 @@ def test_run_security_audit_timeout(self, mock_run): assert results == {} @patch('subprocess.run') - def test_run_security_audit_json_parse_failure_with_retry(self, mock_run): + def test_run_code_review_json_parse_failure_with_retry(self, mock_run): """Test JSON parse failure with retry.""" mock_run.side_effect = [ Mock(returncode=0, stdout='Invalid JSON', stderr=''), @@ -279,7 +279,7 @@ def test_run_security_audit_json_parse_failure_with_retry(self, mock_run): runner = SimpleClaudeRunner() with patch('pathlib.Path.exists', return_value=True): - success, error, results = runner.run_security_audit( + success, error, results = runner.run_code_review( Path('/tmp/test'), "test prompt" ) @@ -288,7 +288,7 @@ def test_run_security_audit_json_parse_failure_with_retry(self, mock_run): assert 'Failed to parse Claude output' in error assert mock_run.call_count == 2 - def test_extract_security_findings_claude_wrapper(self): + def test_extract_review_findings_claude_wrapper(self): """Test extraction from Claude Code wrapper format.""" runner = SimpleClaudeRunner() @@ -301,11 +301,11 @@ def test_extract_security_findings_claude_wrapper(self): }) } - result = runner._extract_security_findings(claude_output) + result = runner._extract_review_findings(claude_output) assert len(result['findings']) == 1 assert result['findings'][0]['file'] == 'test.py' - def test_extract_security_findings_direct_format(self): + def test_extract_review_findings_direct_format(self): """Test that direct findings format was removed - only wrapped format is supported.""" runner = SimpleClaudeRunner() @@ -322,12 +322,12 @@ def test_extract_security_findings_direct_format(self): } } - result = runner._extract_security_findings(claude_output) + result = runner._extract_review_findings(claude_output) # Should return empty structure since direct format is not supported assert len(result['findings']) == 0 assert result['analysis_summary']['review_completed'] is False - def test_extract_security_findings_text_fallback(self): + def test_extract_review_findings_text_fallback(self): """Test that text fallback was removed - only JSON is supported.""" runner = SimpleClaudeRunner() @@ -337,17 +337,17 @@ def test_extract_security_findings_text_fallback(self): } # Should return empty findings since we don't parse text anymore - result = runner._extract_security_findings(claude_output) + result = runner._extract_review_findings(claude_output) assert len(result['findings']) == 0 assert result['analysis_summary']['review_completed'] is False - def test_extract_security_findings_empty(self): + def test_extract_review_findings_empty(self): """Test extraction with no findings.""" runner = SimpleClaudeRunner() # Various empty formats for output in [None, {}, {"result": ""}, {"other": "data"}]: - result = runner._extract_security_findings(output) + result = runner._extract_review_findings(output) assert result['findings'] == [] assert result['analysis_summary']['review_completed'] is False @@ -391,7 +391,7 @@ def test_claude_output_formats(self, mock_run): stderr='' ) - success, error, results = runner.run_security_audit( + success, error, results = runner.run_code_review( Path('/tmp/test'), "test" ) @@ -414,7 +414,7 @@ def test_partial_json_recovery(self, mock_run): runner = SimpleClaudeRunner() with patch('pathlib.Path.exists', return_value=True): - success, error, results = runner.run_security_audit( + success, error, results = runner.run_code_review( Path('/tmp/test'), "test" ) @@ -429,7 +429,7 @@ def test_exception_handling(self, mock_run): runner = SimpleClaudeRunner() with patch('pathlib.Path.exists', return_value=True): - success, error, results = runner.run_security_audit( + success, error, results = runner.run_code_review( Path('/tmp/test'), "test" ) diff --git a/claudecode/test_github_action_audit.py b/claudecode/test_github_action_audit.py index ab6401a..d4b21ad 100644 --- a/claudecode/test_github_action_audit.py +++ b/claudecode/test_github_action_audit.py @@ -17,12 +17,11 @@ def test_main_module_import(self): def test_component_imports(self): """Test that all component modules can be imported.""" - from claudecode.prompts import get_code_review_prompt, get_security_review_prompt + from claudecode.prompts import get_unified_review_prompt from claudecode.json_parser import parse_json_with_fallbacks, extract_json_from_text # Verify they're callable/usable - assert callable(get_code_review_prompt) - assert callable(get_security_review_prompt) + assert callable(get_unified_review_prompt) assert callable(parse_json_with_fallbacks) assert callable(extract_json_from_text) @@ -173,9 +172,9 @@ def test_extract_json_from_text_no_json(self): class TestPromptsModule: """Test the prompts module.""" - def test_get_code_review_prompt(self): - """Test security audit prompt generation.""" - from claudecode.prompts import get_code_review_prompt + def test_get_unified_review_prompt(self): + """Test unified review prompt generation.""" + from claudecode.prompts import get_unified_review_prompt pr_data = { 'number': 123, @@ -203,7 +202,7 @@ def test_get_code_review_prompt(self): pr_diff = "diff --git a/test.py b/test.py\n+added line" - prompt = get_code_review_prompt(pr_data, pr_diff) + prompt = get_unified_review_prompt(pr_data, pr_diff) assert isinstance(prompt, str) assert 'security' in prompt.lower() diff --git a/claudecode/test_prompts.py b/claudecode/test_prompts.py index e31efe6..40c815e 100644 --- a/claudecode/test_prompts.py +++ b/claudecode/test_prompts.py @@ -1,288 +1,92 @@ """Unit tests for the prompts module.""" -from claudecode.prompts import get_code_review_prompt, get_security_review_prompt +from claudecode.prompts import get_unified_review_prompt + + +def _sample_pr_data(): + return { + "number": 123, + "title": "Add new feature", + "body": "This PR adds a new feature to handle user input", + "user": "testuser", + "changed_files": 1, + "additions": 10, + "deletions": 5, + "head": { + "repo": { + "full_name": "owner/repo" + } + }, + "files": [ + { + "filename": "app.py", + "status": "modified", + "additions": 10, + "deletions": 5, + } + ], + } class TestPrompts: - """Test prompt generation functions.""" - - def test_get_code_review_prompt_basic(self): - """Test basic code review prompt generation.""" - pr_data = { - "number": 123, - "title": "Add new feature", - "body": "This PR adds a new feature to handle user input", - "user": "testuser", - "changed_files": 1, - "additions": 10, - "deletions": 5, - "head": { - "repo": { - "full_name": "owner/repo" - } - }, - "files": [ - { - "filename": "app.py", - "status": "modified", - "additions": 10, - "deletions": 5 - } - ] - } - + """Test unified prompt generation.""" + + def test_get_unified_review_prompt_basic(self): + pr_data = _sample_pr_data() + pr_diff = """ diff --git a/app.py b/app.py @@ -1,5 +1,10 @@ def process_input(user_input): - return user_input + # Process the input -+ result = eval(user_input) # Potential security issue ++ result = eval(user_input) + return result """ - - prompt = get_code_review_prompt(pr_data, pr_diff) - - # Check that prompt contains expected elements + + prompt = get_unified_review_prompt(pr_data, pr_diff) + assert isinstance(prompt, str) assert len(prompt) > 0 - assert "123" in prompt # PR number - assert "Add new feature" in prompt # PR title - assert "testuser" in prompt # Author - assert "app.py" in prompt # File name - assert "eval(user_input)" in prompt # The actual diff content + assert "123" in prompt + assert "Add new feature" in prompt + assert "testuser" in prompt + assert "app.py" in prompt + assert "eval(user_input)" in prompt + assert "code quality" in prompt.lower() + assert "security" in prompt.lower() - def test_get_security_review_prompt_basic(self): - """Test basic security review prompt generation.""" - pr_data = { - "number": 222, - "title": "Harden auth flow", - "body": "Tighten auth checks and validation", - "user": "security-team", - "changed_files": 1, - "additions": 12, - "deletions": 4, - "head": { - "repo": { - "full_name": "owner/repo" - } - }, - "files": [ - { - "filename": "auth.py", - "status": "modified", - "additions": 12, - "deletions": 4 - } - ] - } + def test_get_unified_review_prompt_without_diff_uses_file_reading_instructions(self): + pr_data = _sample_pr_data() - pr_diff = """ -diff --git a/auth.py b/auth.py -@@ -1,3 +1,7 @@ - def login(user, password): - return authenticate(user, password) -""" + prompt = get_unified_review_prompt(pr_data, pr_diff="diff --git a/a b/a", include_diff=False) - prompt = get_security_review_prompt(pr_data, pr_diff) + assert "PR DIFF CONTENT:" not in prompt + assert "IMPORTANT - FILE READING INSTRUCTIONS:" in prompt + + def test_get_unified_review_prompt_no_files(self): + pr_data = _sample_pr_data() + pr_data["changed_files"] = 0 + pr_data["files"] = [] + + prompt = get_unified_review_prompt(pr_data, pr_diff="") assert isinstance(prompt, str) - assert "security review" in prompt.lower() - assert "auth.py" in prompt - - def test_get_code_review_prompt_empty_body(self): - """Test prompt generation with empty PR body.""" - pr_data = { - "number": 456, - "title": "Quick fix", - "body": None, # Empty body - "user": "author", - "changed_files": 0, - "additions": 0, - "deletions": 0, - "head": { - "repo": { - "full_name": "owner/repo" - } - }, - "files": [] - } - - pr_diff = "diff --git a/test.js b/test.js" - - prompt = get_code_review_prompt(pr_data, pr_diff) - - assert isinstance(prompt, str) - assert "456" in prompt - assert "Quick fix" in prompt - assert "author" in prompt - - def test_get_code_review_prompt_multiple_files(self): - """Test prompt generation with multiple files.""" - pr_data = { - "number": 789, - "title": "Security improvements", - "body": "Fixing various security issues", - "user": "security-team", - "changed_files": 3, - "additions": 70, - "deletions": 110, - "head": { - "repo": { - "full_name": "owner/repo" - } - }, - "files": [ - { - "filename": "auth.py", - "status": "modified", - "additions": 20, - "deletions": 10 - }, - { - "filename": "config.yaml", - "status": "added", - "additions": 50, - "deletions": 0 - }, - { - "filename": "old_auth.py", - "status": "deleted", - "additions": 0, - "deletions": 100 - } - ] - } - - pr_diff = """ -diff --git a/auth.py b/auth.py -@@ -1,10 +1,20 @@ -+import secrets -+ -diff --git a/config.yaml b/config.yaml -@@ -0,0 +1,50 @@ -+database: -+ password: "hardcoded_password" -""" - - prompt = get_code_review_prompt(pr_data, pr_diff) - - # Check all files are mentioned - assert "auth.py" in prompt - assert "config.yaml" in prompt - assert "old_auth.py" in prompt - - # Check file statuses - assert "modified" in prompt.lower() - assert "added" in prompt.lower() - assert "deleted" in prompt.lower() - - def test_get_code_review_prompt_special_characters(self): - """Test prompt generation with special characters.""" - pr_data = { - "number": 999, - "title": "Fix SQL injection in user's profile", - "body": "This fixes a SQL injection vulnerability in the `get_user()` function", - "user": "user-with-dash", - "changed_files": 1, - "additions": 5, - "deletions": 3, - "head": { - "repo": { - "full_name": "owner/repo" - } - }, - "files": [ - { - "filename": "src/db/queries.py", - "status": "modified", - "additions": 5, - "deletions": 3 - } - ] - } - - pr_diff = """ -diff --git a/src/db/queries.py b/src/db/queries.py -@@ -10,3 +10,5 @@ -- query = f"SELECT * FROM users WHERE id = {user_id}" -+ query = "SELECT * FROM users WHERE id = ?" -+ cursor.execute(query, (user_id,)) -""" - - prompt = get_code_review_prompt(pr_data, pr_diff) - - # Check special characters are preserved - assert "user's" in prompt - assert "user-with-dash" in prompt - assert "src/db/queries.py" in prompt - - def test_get_code_review_prompt_no_files(self): - """Test prompt generation with no files (edge case).""" - pr_data = { - "number": 111, - "title": "Documentation update", - "body": "Just updating docs", - "user": "doc-author", - "changed_files": 0, - "additions": 0, - "deletions": 0, - "head": { - "repo": { - "full_name": "owner/repo" - } - }, - "files": [] # No files - } - - pr_diff = "" # Empty diff - - prompt = get_code_review_prompt(pr_data, pr_diff) - - assert isinstance(prompt, str) - assert "111" in prompt - assert "Documentation update" in prompt - - def test_get_code_review_prompt_structure(self): - """Test that prompt has expected structure.""" - pr_data = { - "number": 42, - "title": "Test PR", - "body": "Test description", - "user": "testuser", - "changed_files": 1, - "additions": 1, - "deletions": 1, - "head": { - "repo": { - "full_name": "owner/repo" - } - }, - "files": [ - { - "filename": "test.py", - "status": "modified", - "additions": 1, - "deletions": 1 - } - ] - } - + assert "Files changed: 0" in prompt + + def test_get_unified_review_prompt_structure(self): + pr_data = _sample_pr_data() + pr_data["title"] = "Test PR" + pr_diff = "diff --git a/test.py b/test.py\n+print('test')" - - prompt = get_code_review_prompt(pr_data, pr_diff) - - # Should contain sections for metadata and diff - assert "PR #" in prompt or "Pull Request" in prompt - assert "Title:" in prompt or pr_data["title"] in prompt - assert "Author:" in prompt or pr_data["user"]["login"] in prompt - assert "Files:" in prompt or "test.py" in prompt - - # Should contain the actual diff - assert pr_diff in prompt or "print('test')" in prompt - - def test_get_code_review_prompt_long_diff(self): - """Test prompt generation with very long diff.""" + prompt = get_unified_review_prompt(pr_data, pr_diff) + + assert "CONTEXT:" in prompt + assert "OBJECTIVE:" in prompt + assert "REQUIRED OUTPUT FORMAT:" in prompt + assert pr_diff in prompt + + def test_get_unified_review_prompt_long_diff(self): pr_data = { "number": 12345, "title": "Major refactoring", @@ -301,34 +105,31 @@ def test_get_code_review_prompt_long_diff(self): "filename": f"file{i}.py", "status": "modified", "additions": 100, - "deletions": 50 + "deletions": 50, } for i in range(10) - ] + ], } - - # Create a large diff + pr_diff = "\n".join([ f"diff --git a/file{i}.py b/file{i}.py\n" + "\n".join([f"+line {j}" for j in range(50)]) for i in range(10) ]) - - prompt = get_code_review_prompt(pr_data, pr_diff) - - # Should handle large diffs without error + + prompt = get_unified_review_prompt(pr_data, pr_diff) + assert isinstance(prompt, str) - assert len(prompt) > 1000 # Should be substantial + assert len(prompt) > 1000 assert "12345" in prompt assert "Major refactoring" in prompt - - def test_get_code_review_prompt_unicode(self): - """Test prompt generation with unicode characters.""" + + def test_get_unified_review_prompt_unicode(self): pr_data = { "number": 666, - "title": "Add emoji support 🎉", - "body": "This PR adds emoji rendering 🔒 🛡️", - "user": "émoji-user", + "title": "Add emoji support", + "body": "This PR adds emoji rendering", + "user": "emoji-user", "changed_files": 1, "additions": 42, "deletions": 0, @@ -339,25 +140,36 @@ def test_get_code_review_prompt_unicode(self): }, "files": [ { - "filename": "émojis.py", + "filename": "emoji.py", "status": "added", "additions": 42, - "deletions": 0 + "deletions": 0, } - ] + ], } - + pr_diff = """ -diff --git a/émojis.py b/émojis.py -+# 🔒 Security check +diff --git a/emoji.py b/emoji.py ++# Security check +def check_input(text: str) -> bool: -+ return "🚨" not in text ++ return "ALERT" not in text """ - - prompt = get_code_review_prompt(pr_data, pr_diff) - - # Check unicode is preserved - assert "🎉" in prompt # Title emoji - assert "émoji-user" in prompt - assert "émojis.py" in prompt - assert "🚨" in prompt # From diff + + prompt = get_unified_review_prompt(pr_data, pr_diff) + + assert "emoji-user" in prompt + assert "emoji.py" in prompt + assert "ALERT" in prompt + + def test_get_unified_review_prompt_custom_instructions(self): + pr_data = _sample_pr_data() + + prompt = get_unified_review_prompt( + pr_data, + pr_diff="diff --git a/app.py b/app.py", + custom_review_instructions="Check transaction consistency.", + custom_security_instructions="Check GraphQL authz.", + ) + + assert "Check transaction consistency." in prompt + assert "Check GraphQL authz." in prompt diff --git a/docs/custom-security-scan-instructions.md b/docs/custom-security-scan-instructions.md index 2719a7e..d9bb43b 100644 --- a/docs/custom-security-scan-instructions.md +++ b/docs/custom-security-scan-instructions.md @@ -52,7 +52,7 @@ See [examples/custom-security-scan-instructions.txt](../examples/custom-security ## How It Works -Your custom instructions are appended to the security sections of both the general review and the dedicated security review prompts. This means: +Your custom instructions are appended to the security section of the unified review prompt. This means: 1. All default security categories are still checked 2. Your custom categories extend (not replace) the default security scan 3. The same HIGH/MEDIUM/LOW severity guidelines apply