Skip to content

Commit 9c0d47c

Browse files
authored
Use agentic file reading for large diffs instead of skipping (#19)
Instead of skipping reviews when the diff exceeds max-diff-lines, switch to agentic mode where Claude reads files on-demand. This ensures large PRs still get reviewed while managing context limits. Changes: - Large diffs trigger agentic mode instead of skipping review - `max-diff-lines=0` now forces agentic mode (always read files) - Updated prompts to give Claude clear file reading instructions - Preserved existing fallback for `PROMPT_TOO_LONG` errors Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>"
1 parent 7091abd commit 9c0d47c

4 files changed

Lines changed: 44 additions & 37 deletions

File tree

action.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -69,7 +69,7 @@ inputs:
6969
default: ''
7070

7171
max-diff-lines:
72-
description: 'Maximum number of diff lines to review. PRs exceeding this limit will be skipped with a warning. Set to 0 to disable limit. Default 5000 keeps context under 50% of token limit.'
72+
description: 'Maximum diff lines to embed in prompt. Larger diffs use agentic file reading instead. Set to 0 to always use agentic mode. Default: 5000'
7373
required: false
7474
default: '5000'
7575

claudecode/github_action_audit.py

Lines changed: 12 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -680,34 +680,20 @@ def main():
680680
print(json.dumps({'error': f'Failed to fetch PR data: {str(e)}'}))
681681
sys.exit(EXIT_GENERAL_ERROR)
682682

683-
# Check diff size limit
683+
# Determine whether to embed diff or use agentic file reading
684684
max_diff_lines_str = os.environ.get('MAX_DIFF_LINES', '5000')
685685
try:
686686
max_diff_lines = int(max_diff_lines_str)
687687
except ValueError:
688688
max_diff_lines = 5000
689689

690-
if max_diff_lines > 0:
691-
diff_line_count = len(pr_diff.splitlines())
692-
if diff_line_count > max_diff_lines:
693-
print(f"[Warning] PR diff has {diff_line_count} lines, exceeding limit of {max_diff_lines}. Skipping review.", file=sys.stderr)
694-
output = {
695-
'pr_number': pr_number,
696-
'repo': repo_name,
697-
'findings': [],
698-
'analysis_summary': {
699-
'files_reviewed': 0,
700-
'high_severity': 0,
701-
'medium_severity': 0,
702-
'low_severity': 0,
703-
'review_completed': False,
704-
'skipped_reason': f'Diff too large: {diff_line_count} lines exceeds limit of {max_diff_lines}'
705-
}
706-
}
707-
print(json.dumps(output, indent=2))
708-
sys.exit(EXIT_SUCCESS)
709-
else:
710-
print(f"[Debug] PR diff has {diff_line_count} lines (limit: {max_diff_lines})", file=sys.stderr)
690+
diff_line_count = len(pr_diff.splitlines())
691+
use_agentic_mode = max_diff_lines == 0 or diff_line_count > max_diff_lines
692+
693+
if use_agentic_mode:
694+
print(f"[Info] Using agentic file reading mode (diff has {diff_line_count} lines, threshold: {max_diff_lines})", file=sys.stderr)
695+
else:
696+
print(f"[Debug] Embedding diff in prompt ({diff_line_count} lines)", file=sys.stderr)
711697

712698
# Get repo directory from environment or use current directory
713699
repo_path = os.environ.get('REPO_PATH')
@@ -716,7 +702,7 @@ def main():
716702
def run_review(include_diff: bool):
717703
prompt_text = get_unified_review_prompt(
718704
pr_data,
719-
pr_diff,
705+
pr_diff if include_diff else None,
720706
include_diff=include_diff,
721707
custom_review_instructions=custom_review_instructions,
722708
custom_security_instructions=custom_security_instructions,
@@ -727,12 +713,12 @@ def run_review(include_diff: bool):
727713
analysis_summary_from_review = {}
728714

729715
try:
730-
(success, error_msg, review_results), prompt_len = run_review(include_diff=True)
716+
(success, error_msg, review_results), prompt_len = run_review(include_diff=not use_agentic_mode)
731717

718+
# Fallback to agentic mode if prompt still too long
732719
if not success and error_msg == "PROMPT_TOO_LONG":
733-
print(f"[Info] Review prompt too long, retrying without diff. Original prompt length: {prompt_len} characters", file=sys.stderr)
720+
print(f"[Info] Prompt too long ({prompt_len} chars), falling back to agentic mode", file=sys.stderr)
734721
(success, error_msg, review_results), prompt_len = run_review(include_diff=False)
735-
print(f"[Info] Review prompt length without diff: {prompt_len} characters", file=sys.stderr)
736722

737723
if not success:
738724
raise AuditError(f'Code review failed: {error_msg}')

claudecode/prompts.py

Lines changed: 27 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -37,10 +37,17 @@ def get_unified_review_prompt(
3737
3838
Review the complete diff above. This contains all code changes in the PR.
3939
"""
40-
elif pr_diff and not include_diff:
40+
else:
4141
diff_section = """
4242
43-
NOTE: PR diff was omitted due to size constraints. Please use the file exploration tools to examine the specific files that were changed in this PR.
43+
IMPORTANT - FILE READING INSTRUCTIONS:
44+
You have access to the repository files. For each file listed above, use the Read tool to examine the changes.
45+
Focus on the files that are most likely to contain issues based on the PR context.
46+
47+
To review effectively:
48+
1. Read each modified file to understand the current code
49+
2. Look at surrounding code context when needed to understand the changes
50+
3. Check related files if you need to understand dependencies or usage patterns
4451
"""
4552

4653
custom_review_section = ""
@@ -256,10 +263,17 @@ def get_code_review_prompt(
256263
257264
Review the complete diff above. This contains all code changes in the PR.
258265
"""
259-
elif pr_diff and not include_diff:
266+
else:
260267
diff_section = """
261268
262-
NOTE: PR diff was omitted due to size constraints. Please use the file exploration tools to examine the specific files that were changed in this PR.
269+
IMPORTANT - FILE READING INSTRUCTIONS:
270+
You have access to the repository files. For each file listed above, use the Read tool to examine the changes.
271+
Focus on the files that are most likely to contain issues based on the PR context.
272+
273+
To review effectively:
274+
1. Read each modified file to understand the current code
275+
2. Look at surrounding code context when needed to understand the changes
276+
3. Check related files if you need to understand dependencies or usage patterns
263277
"""
264278

265279
# Add custom instructions if provided
@@ -414,10 +428,17 @@ def get_security_review_prompt(
414428
415429
Review the complete diff above. This contains all code changes in the PR.
416430
"""
417-
elif pr_diff and not include_diff:
431+
else:
418432
diff_section = """
419433
420-
NOTE: PR diff was omitted due to size constraints. Please use the file exploration tools to examine the specific files that were changed in this PR.
434+
IMPORTANT - FILE READING INSTRUCTIONS:
435+
You have access to the repository files. For each file listed above, use the Read tool to examine the changes.
436+
Focus on the files that are most likely to contain issues based on the PR context.
437+
438+
To review effectively:
439+
1. Read each modified file to understand the current code
440+
2. Look at surrounding code context when needed to understand the changes
441+
3. Check related files if you need to understand dependencies or usage patterns
421442
"""
422443

423444
custom_security_section = ""

claudecode/test_github_action_audit.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -457,15 +457,15 @@ def test_max_diff_lines_env_parsing(self):
457457

458458
assert max_lines == 5000 # Default when not set
459459

460-
def test_max_diff_lines_zero_disables_limit(self):
461-
"""Test that setting MAX_DIFF_LINES to 0 disables the limit."""
460+
def test_max_diff_lines_zero_forces_agentic_mode(self):
461+
"""Test that setting MAX_DIFF_LINES to 0 forces agentic file reading mode."""
462462
import os
463463
from unittest.mock import patch
464464

465465
with patch.dict('os.environ', {'MAX_DIFF_LINES': '0'}):
466466
max_lines_str = os.environ.get('MAX_DIFF_LINES', '5000')
467467
max_lines = int(max_lines_str)
468468

469-
# When max_lines is 0, the check should be skipped
469+
# When max_lines is 0, agentic mode is always used
470470
assert max_lines == 0
471-
# In the actual code: if max_diff_lines > 0: ... (check is skipped)
471+
# In the actual code: use_agentic_mode = max_diff_lines == 0 or ...

0 commit comments

Comments
 (0)