11from __future__ import annotations
22
33import logging
4+ import re
45import subprocess
56from collections import defaultdict
67from pathlib import Path
1011
1112from ..ignore import get_ignore_specs , get_whitelist_spec , is_whitelisted , should_ignore
1213from ..tokens import count_tokens
14+ from ..tree import KNOWN_BINARY_EXTENSIONS
1315from .config import LIMITS
1416from .config .extensions import CODE_EXTENSIONS , CONFIG_EXTENSIONS , DOC_EXTENSIONS
1517from .edges import discover_all_related_files
1618from .fragments import enclosing_fragment , fragment_file # type: ignore[attr-defined]
1719from .git import (
1820 GitError ,
1921 get_changed_files ,
22+ get_deleted_files ,
2023 get_diff_text ,
24+ get_renamed_old_paths ,
2125 get_untracked_files ,
2226 is_git_repo ,
2327 parse_diff ,
@@ -64,11 +68,21 @@ def _kind_priority(kind: str) -> int:
6468 return 0 if kind in _SEMANTIC_KINDS else 1
6569
6670
71+ _BINARY_CTRL_RE = re .compile (r"[\x00-\x08\x0e-\x1f]" )
72+
73+
74+ def _looks_binary (content : str ) -> bool :
75+ return bool (_BINARY_CTRL_RE .search (content [:8192 ]))
76+
77+
6778def _read_file_content (
6879 file_path : Path ,
6980 root_dir : Path ,
7081 preferred_revs : list [str ],
7182) -> str | None :
83+ if file_path .suffix .lower () in KNOWN_BINARY_EXTENSIONS :
84+ return None
85+
7286 abs_path = _normalize_path (file_path , root_dir )
7387 try :
7488 rel = abs_path .relative_to (root_dir .resolve ())
@@ -78,13 +92,19 @@ def _read_file_content(
7892
7993 for rev in preferred_revs :
8094 try :
81- return show_file_at_revision (root_dir , rev , rel )
95+ content = show_file_at_revision (root_dir , rev , rel )
96+ if _looks_binary (content ):
97+ return None
98+ return content
8299 except GitError :
83100 continue
84101
85102 if abs_path .exists () and abs_path .is_file ():
86103 try :
87- return abs_path .read_text (encoding = "utf-8" )
104+ content = abs_path .read_text (encoding = "utf-8" )
105+ if _looks_binary (content ):
106+ return None
107+ return content
88108 except (OSError , UnicodeDecodeError ):
89109 pass
90110
@@ -104,6 +124,7 @@ def _build_preferred_revs(base_rev: str | None, head_rev: str | None) -> list[st
104124
105125
106126_MAX_GENERATED_FRAGMENTS = LIMITS .max_generated_fragments
127+ _MAX_GENERATED_LINES = LIMITS .max_generated_lines
107128
108129
109130_GENERATED_FILENAME_PATTERNS = frozenset (
@@ -117,6 +138,7 @@ def _build_preferred_revs(base_rev: str | None, head_rev: str | None) -> list[st
117138 ".min.js" ,
118139 ".min.css" ,
119140 ".designer.cs" ,
141+ ".api" ,
120142 }
121143)
122144
@@ -195,6 +217,26 @@ def _process_files_for_fragments(
195217 " (generated)" if is_generated else "" ,
196218 )
197219
220+ if is_generated :
221+ truncated : list [Fragment ] = []
222+ for frag in file_frags :
223+ if frag .line_count > _MAX_GENERATED_LINES :
224+ lines = frag .content .splitlines ()
225+ remaining = len (lines ) - _MAX_GENERATED_LINES
226+ lines = lines [:_MAX_GENERATED_LINES ]
227+ truncated_content = "\n " .join (lines ) + f"\n # ... [{ remaining } more lines]"
228+ truncated .append (
229+ Fragment (
230+ id = FragmentId (frag .path , frag .start_line , frag .start_line + len (lines ) - 1 ),
231+ kind = frag .kind ,
232+ content = truncated_content ,
233+ identifiers = extract_identifiers (truncated_content ),
234+ )
235+ )
236+ else :
237+ truncated .append (frag )
238+ file_frags = truncated
239+
198240 for frag in file_frags :
199241 fragments .append (frag )
200242 seen_frag_ids .add (frag .id )
@@ -521,6 +563,10 @@ def build_diff_context(
521563 changed_files = _filter_ignored (changed_files , root_dir , combined_spec )
522564 changed_files = _filter_whitelist (changed_files , root_dir , wl_spec )
523565
566+ excluded_paths = get_deleted_files (root_dir , diff_range ) | get_renamed_old_paths (root_dir , diff_range )
567+ if excluded_paths :
568+ changed_files = [f for f in changed_files if f .resolve () not in excluded_paths ]
569+
524570 preferred_revs = _build_preferred_revs (base_rev , head_rev )
525571
526572 seen_frag_ids : set [FragmentId ] = set ()
0 commit comments