From 01fde0aeb900f70aae4e2ea8959b2ed2778f6b74 Mon Sep 17 00:00:00 2001
From: Neon <neon@neosynth.net>
Date: Thu, 5 Feb 2026 14:34:42 -0800
Subject: [PATCH] Use agentic file reading for large diffs instead of skipping

  Instead of skipping reviews when the diff exceeds max-diff-lines,
  switch to agentic mode where Claude reads files on-demand. This
  ensures large PRs still get reviewed while managing context limits.

  Changes:
  - Large diffs trigger agentic mode instead of skipping review
  - max-diff-lines=0 now forces agentic mode (always read files)
  - Updated prompts to give Claude clear file reading instructions
  - Preserved existing fallback for PROMPT_TOO_LONG errors

  Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>"
---
 action.yml                             |  2 +-
 claudecode/github_action_audit.py      | 38 ++++++++------------------
 claudecode/prompts.py                  | 33 ++++++++++++++++++----
 claudecode/test_github_action_audit.py |  8 +++---
 4 files changed, 44 insertions(+), 37 deletions(-)

diff --git a/action.yml b/action.yml
index 3286f38..1961dfa 100644
--- a/action.yml
+++ b/action.yml
@@ -69,7 +69,7 @@ inputs:
     default: ''
 
   max-diff-lines:
-    description: 'Maximum number of diff lines to review. PRs exceeding this limit will be skipped with a warning. Set to 0 to disable limit. Default 5000 keeps context under 50% of token limit.'
+    description: 'Maximum diff lines to embed in prompt. Larger diffs use agentic file reading instead. Set to 0 to always use agentic mode. Default: 5000'
     required: false
     default: '5000'
 
diff --git a/claudecode/github_action_audit.py b/claudecode/github_action_audit.py
index 132f385..2299df0 100644
--- a/claudecode/github_action_audit.py
+++ b/claudecode/github_action_audit.py
@@ -680,34 +680,20 @@ def main():
             print(json.dumps({'error': f'Failed to fetch PR data: {str(e)}'}))
             sys.exit(EXIT_GENERAL_ERROR)
 
-        # Check diff size limit
+        # Determine whether to embed diff or use agentic file reading
         max_diff_lines_str = os.environ.get('MAX_DIFF_LINES', '5000')
         try:
             max_diff_lines = int(max_diff_lines_str)
         except ValueError:
             max_diff_lines = 5000
 
-        if max_diff_lines > 0:
-            diff_line_count = len(pr_diff.splitlines())
-            if diff_line_count > max_diff_lines:
-                print(f"[Warning] PR diff has {diff_line_count} lines, exceeding limit of {max_diff_lines}. Skipping review.", file=sys.stderr)
-                output = {
-                    'pr_number': pr_number,
-                    'repo': repo_name,
-                    'findings': [],
-                    'analysis_summary': {
-                        'files_reviewed': 0,
-                        'high_severity': 0,
-                        'medium_severity': 0,
-                        'low_severity': 0,
-                        'review_completed': False,
-                        'skipped_reason': f'Diff too large: {diff_line_count} lines exceeds limit of {max_diff_lines}'
-                    }
-                }
-                print(json.dumps(output, indent=2))
-                sys.exit(EXIT_SUCCESS)
-            else:
-                print(f"[Debug] PR diff has {diff_line_count} lines (limit: {max_diff_lines})", file=sys.stderr)
+        diff_line_count = len(pr_diff.splitlines())
+        use_agentic_mode = max_diff_lines == 0 or diff_line_count > max_diff_lines
+
+        if use_agentic_mode:
+            print(f"[Info] Using agentic file reading mode (diff has {diff_line_count} lines, threshold: {max_diff_lines})", file=sys.stderr)
+        else:
+            print(f"[Debug] Embedding diff in prompt ({diff_line_count} lines)", file=sys.stderr)
 
         # Get repo directory from environment or use current directory
         repo_path = os.environ.get('REPO_PATH')
@@ -716,7 +702,7 @@ def main():
         def run_review(include_diff: bool):
             prompt_text = get_unified_review_prompt(
                 pr_data,
-                pr_diff,
+                pr_diff if include_diff else None,
                 include_diff=include_diff,
                 custom_review_instructions=custom_review_instructions,
                 custom_security_instructions=custom_security_instructions,
@@ -727,12 +713,12 @@ def run_review(include_diff: bool):
         analysis_summary_from_review = {}
 
         try:
-            (success, error_msg, review_results), prompt_len = run_review(include_diff=True)
+            (success, error_msg, review_results), prompt_len = run_review(include_diff=not use_agentic_mode)
 
+            # Fallback to agentic mode if prompt still too long
             if not success and error_msg == "PROMPT_TOO_LONG":
-                print(f"[Info] Review prompt too long, retrying without diff. Original prompt length: {prompt_len} characters", file=sys.stderr)
+                print(f"[Info] Prompt too long ({prompt_len} chars), falling back to agentic mode", file=sys.stderr)
                 (success, error_msg, review_results), prompt_len = run_review(include_diff=False)
-                print(f"[Info] Review prompt length without diff: {prompt_len} characters", file=sys.stderr)
 
             if not success:
                 raise AuditError(f'Code review failed: {error_msg}')
diff --git a/claudecode/prompts.py b/claudecode/prompts.py
index f93a057..7c9cc75 100644
--- a/claudecode/prompts.py
+++ b/claudecode/prompts.py
@@ -37,10 +37,17 @@ def get_unified_review_prompt(
 
 Review the complete diff above. This contains all code changes in the PR.
 """
-    elif pr_diff and not include_diff:
+    else:
         diff_section = """
 
-NOTE: PR diff was omitted due to size constraints. Please use the file exploration tools to examine the specific files that were changed in this PR.
+IMPORTANT - FILE READING INSTRUCTIONS:
+You have access to the repository files. For each file listed above, use the Read tool to examine the changes.
+Focus on the files that are most likely to contain issues based on the PR context.
+
+To review effectively:
+1. Read each modified file to understand the current code
+2. Look at surrounding code context when needed to understand the changes
+3. Check related files if you need to understand dependencies or usage patterns
 """
 
     custom_review_section = ""
@@ -256,10 +263,17 @@ def get_code_review_prompt(
 
 Review the complete diff above. This contains all code changes in the PR.
 """
-    elif pr_diff and not include_diff:
+    else:
         diff_section = """
 
-NOTE: PR diff was omitted due to size constraints. Please use the file exploration tools to examine the specific files that were changed in this PR.
+IMPORTANT - FILE READING INSTRUCTIONS:
+You have access to the repository files. For each file listed above, use the Read tool to examine the changes.
+Focus on the files that are most likely to contain issues based on the PR context.
+
+To review effectively:
+1. Read each modified file to understand the current code
+2. Look at surrounding code context when needed to understand the changes
+3. Check related files if you need to understand dependencies or usage patterns
 """
 
     # Add custom instructions if provided
@@ -414,10 +428,17 @@ def get_security_review_prompt(
 
 Review the complete diff above. This contains all code changes in the PR.
 """
-    elif pr_diff and not include_diff:
+    else:
         diff_section = """
 
-NOTE: PR diff was omitted due to size constraints. Please use the file exploration tools to examine the specific files that were changed in this PR.
+IMPORTANT - FILE READING INSTRUCTIONS:
+You have access to the repository files. For each file listed above, use the Read tool to examine the changes.
+Focus on the files that are most likely to contain issues based on the PR context.
+
+To review effectively:
+1. Read each modified file to understand the current code
+2. Look at surrounding code context when needed to understand the changes
+3. Check related files if you need to understand dependencies or usage patterns
 """
 
     custom_security_section = ""
diff --git a/claudecode/test_github_action_audit.py b/claudecode/test_github_action_audit.py
index f039607..f983d6e 100644
--- a/claudecode/test_github_action_audit.py
+++ b/claudecode/test_github_action_audit.py
@@ -457,8 +457,8 @@ def test_max_diff_lines_env_parsing(self):
 
         assert max_lines == 5000  # Default when not set
 
-    def test_max_diff_lines_zero_disables_limit(self):
-        """Test that setting MAX_DIFF_LINES to 0 disables the limit."""
+    def test_max_diff_lines_zero_forces_agentic_mode(self):
+        """Test that setting MAX_DIFF_LINES to 0 forces agentic file reading mode."""
         import os
         from unittest.mock import patch
 
@@ -466,6 +466,6 @@ def test_max_diff_lines_zero_disables_limit(self):
             max_lines_str = os.environ.get('MAX_DIFF_LINES', '5000')
             max_lines = int(max_lines_str)
 
-            # When max_lines is 0, the check should be skipped
+            # When max_lines is 0, agentic mode is always used
             assert max_lines == 0
-            # In the actual code: if max_diff_lines > 0: ... (check is skipped)
+            # In the actual code: use_agentic_mode = max_diff_lines == 0 or ...