From 01fde0aeb900f70aae4e2ea8959b2ed2778f6b74 Mon Sep 17 00:00:00 2001 From: Neon Date: Thu, 5 Feb 2026 14:34:42 -0800 Subject: [PATCH] Use agentic file reading for large diffs instead of skipping Instead of skipping reviews when the diff exceeds max-diff-lines, switch to agentic mode where Claude reads files on-demand. This ensures large PRs still get reviewed while managing context limits. Changes: - Large diffs trigger agentic mode instead of skipping review - max-diff-lines=0 now forces agentic mode (always read files) - Updated prompts to give Claude clear file reading instructions - Preserved existing fallback for PROMPT_TOO_LONG errors Co-Authored-By: Claude Opus 4.5 " --- action.yml | 2 +- claudecode/github_action_audit.py | 38 ++++++++------------------ claudecode/prompts.py | 33 ++++++++++++++++++---- claudecode/test_github_action_audit.py | 8 +++--- 4 files changed, 44 insertions(+), 37 deletions(-) diff --git a/action.yml b/action.yml index 3286f38..1961dfa 100644 --- a/action.yml +++ b/action.yml @@ -69,7 +69,7 @@ inputs: default: '' max-diff-lines: - description: 'Maximum number of diff lines to review. PRs exceeding this limit will be skipped with a warning. Set to 0 to disable limit. Default 5000 keeps context under 50% of token limit.' + description: 'Maximum diff lines to embed in prompt. Larger diffs use agentic file reading instead. Set to 0 to always use agentic mode. Default: 5000' required: false default: '5000' diff --git a/claudecode/github_action_audit.py b/claudecode/github_action_audit.py index 132f385..2299df0 100644 --- a/claudecode/github_action_audit.py +++ b/claudecode/github_action_audit.py @@ -680,34 +680,20 @@ def main(): print(json.dumps({'error': f'Failed to fetch PR data: {str(e)}'})) sys.exit(EXIT_GENERAL_ERROR) - # Check diff size limit + # Determine whether to embed diff or use agentic file reading max_diff_lines_str = os.environ.get('MAX_DIFF_LINES', '5000') try: max_diff_lines = int(max_diff_lines_str) except ValueError: max_diff_lines = 5000 - if max_diff_lines > 0: - diff_line_count = len(pr_diff.splitlines()) - if diff_line_count > max_diff_lines: - print(f"[Warning] PR diff has {diff_line_count} lines, exceeding limit of {max_diff_lines}. Skipping review.", file=sys.stderr) - output = { - 'pr_number': pr_number, - 'repo': repo_name, - 'findings': [], - 'analysis_summary': { - 'files_reviewed': 0, - 'high_severity': 0, - 'medium_severity': 0, - 'low_severity': 0, - 'review_completed': False, - 'skipped_reason': f'Diff too large: {diff_line_count} lines exceeds limit of {max_diff_lines}' - } - } - print(json.dumps(output, indent=2)) - sys.exit(EXIT_SUCCESS) - else: - print(f"[Debug] PR diff has {diff_line_count} lines (limit: {max_diff_lines})", file=sys.stderr) + diff_line_count = len(pr_diff.splitlines()) + use_agentic_mode = max_diff_lines == 0 or diff_line_count > max_diff_lines + + if use_agentic_mode: + print(f"[Info] Using agentic file reading mode (diff has {diff_line_count} lines, threshold: {max_diff_lines})", file=sys.stderr) + else: + print(f"[Debug] Embedding diff in prompt ({diff_line_count} lines)", file=sys.stderr) # Get repo directory from environment or use current directory repo_path = os.environ.get('REPO_PATH') @@ -716,7 +702,7 @@ def main(): def run_review(include_diff: bool): prompt_text = get_unified_review_prompt( pr_data, - pr_diff, + pr_diff if include_diff else None, include_diff=include_diff, custom_review_instructions=custom_review_instructions, custom_security_instructions=custom_security_instructions, @@ -727,12 +713,12 @@ def run_review(include_diff: bool): analysis_summary_from_review = {} try: - (success, error_msg, review_results), prompt_len = run_review(include_diff=True) + (success, error_msg, review_results), prompt_len = run_review(include_diff=not use_agentic_mode) + # Fallback to agentic mode if prompt still too long if not success and error_msg == "PROMPT_TOO_LONG": - print(f"[Info] Review prompt too long, retrying without diff. Original prompt length: {prompt_len} characters", file=sys.stderr) + print(f"[Info] Prompt too long ({prompt_len} chars), falling back to agentic mode", file=sys.stderr) (success, error_msg, review_results), prompt_len = run_review(include_diff=False) - print(f"[Info] Review prompt length without diff: {prompt_len} characters", file=sys.stderr) if not success: raise AuditError(f'Code review failed: {error_msg}') diff --git a/claudecode/prompts.py b/claudecode/prompts.py index f93a057..7c9cc75 100644 --- a/claudecode/prompts.py +++ b/claudecode/prompts.py @@ -37,10 +37,17 @@ def get_unified_review_prompt( Review the complete diff above. This contains all code changes in the PR. """ - elif pr_diff and not include_diff: + else: diff_section = """ -NOTE: PR diff was omitted due to size constraints. Please use the file exploration tools to examine the specific files that were changed in this PR. +IMPORTANT - FILE READING INSTRUCTIONS: +You have access to the repository files. For each file listed above, use the Read tool to examine the changes. +Focus on the files that are most likely to contain issues based on the PR context. + +To review effectively: +1. Read each modified file to understand the current code +2. Look at surrounding code context when needed to understand the changes +3. Check related files if you need to understand dependencies or usage patterns """ custom_review_section = "" @@ -256,10 +263,17 @@ def get_code_review_prompt( Review the complete diff above. This contains all code changes in the PR. """ - elif pr_diff and not include_diff: + else: diff_section = """ -NOTE: PR diff was omitted due to size constraints. Please use the file exploration tools to examine the specific files that were changed in this PR. +IMPORTANT - FILE READING INSTRUCTIONS: +You have access to the repository files. For each file listed above, use the Read tool to examine the changes. +Focus on the files that are most likely to contain issues based on the PR context. + +To review effectively: +1. Read each modified file to understand the current code +2. Look at surrounding code context when needed to understand the changes +3. Check related files if you need to understand dependencies or usage patterns """ # Add custom instructions if provided @@ -414,10 +428,17 @@ def get_security_review_prompt( Review the complete diff above. This contains all code changes in the PR. """ - elif pr_diff and not include_diff: + else: diff_section = """ -NOTE: PR diff was omitted due to size constraints. Please use the file exploration tools to examine the specific files that were changed in this PR. +IMPORTANT - FILE READING INSTRUCTIONS: +You have access to the repository files. For each file listed above, use the Read tool to examine the changes. +Focus on the files that are most likely to contain issues based on the PR context. + +To review effectively: +1. Read each modified file to understand the current code +2. Look at surrounding code context when needed to understand the changes +3. Check related files if you need to understand dependencies or usage patterns """ custom_security_section = "" diff --git a/claudecode/test_github_action_audit.py b/claudecode/test_github_action_audit.py index f039607..f983d6e 100644 --- a/claudecode/test_github_action_audit.py +++ b/claudecode/test_github_action_audit.py @@ -457,8 +457,8 @@ def test_max_diff_lines_env_parsing(self): assert max_lines == 5000 # Default when not set - def test_max_diff_lines_zero_disables_limit(self): - """Test that setting MAX_DIFF_LINES to 0 disables the limit.""" + def test_max_diff_lines_zero_forces_agentic_mode(self): + """Test that setting MAX_DIFF_LINES to 0 forces agentic file reading mode.""" import os from unittest.mock import patch @@ -466,6 +466,6 @@ def test_max_diff_lines_zero_disables_limit(self): max_lines_str = os.environ.get('MAX_DIFF_LINES', '5000') max_lines = int(max_lines_str) - # When max_lines is 0, the check should be skipped + # When max_lines is 0, agentic mode is always used assert max_lines == 0 - # In the actual code: if max_diff_lines > 0: ... (check is skipped) + # In the actual code: use_agentic_mode = max_diff_lines == 0 or ...