99from typing import Dict , List
1010import logging
1111import traceback
12+ import time
13+ import signal
1214from pathlib import Path
15+ from contextlib import contextmanager
1316from codewiki .src .be .dependency_analyzer .models .core import Node , CallRelationship
1417from codewiki .src .be .dependency_analyzer .utils .patterns import CODE_EXTENSIONS
1518from codewiki .src .be .dependency_analyzer .utils .security import safe_open_text
1619
1720logger = logging .getLogger (__name__ )
1821
1922
23+ class TimeoutError (Exception ):
24+ """Raised when file parsing exceeds timeout."""
25+ pass
26+
27+
28+ @contextmanager
29+ def timeout (seconds ):
30+ """Context manager for timeout on file parsing."""
31+ def signal_handler (signum , frame ):
32+ raise TimeoutError (f"File parsing exceeded { seconds } s timeout" )
33+
34+ # Only use signal on Unix systems (not Windows)
35+ try :
36+ old_handler = signal .signal (signal .SIGALRM , signal_handler )
37+ signal .alarm (seconds )
38+ yield
39+ except AttributeError :
40+ # Windows doesn't support SIGALRM, skip timeout
41+ yield
42+ finally :
43+ try :
44+ signal .alarm (0 )
45+ signal .signal (signal .SIGALRM , old_handler )
46+ except (AttributeError , ValueError ):
47+ pass
48+
49+
2050class CallGraphAnalyzer :
2151 def __init__ (self ):
2252 """Initialize the call graph analyzer."""
@@ -35,17 +65,35 @@ def analyze_code_files(self, code_files: List[Dict], base_dir: str) -> Dict:
3565 4. Returns all nodes and relationships
3666 """
3767 logger .debug (f"Starting analysis of { len (code_files )} files" )
68+ logger .info (f"📊 Parsing { len (code_files )} source files (this may take a few minutes)..." )
3869
3970 self .functions = {}
4071 self .call_relationships = []
4172
4273 files_analyzed = 0
43- for file_info in code_files :
44- logger .debug (f"Analyzing: { file_info ['path' ]} " )
45- self ._analyze_code_file (base_dir , file_info )
46- files_analyzed += 1
47- logger .debug (
48- f"Analysis complete: { files_analyzed } files analyzed, { len (self .functions )} functions, { len (self .call_relationships )} relationships"
74+ files_failed = 0
75+ start_time = time .time ()
76+
77+ for idx , file_info in enumerate (code_files , 1 ):
78+ file_path = file_info ['path' ]
79+ try :
80+ # Log progress every file with elapsed time
81+ if idx % max (1 , len (code_files ) // 10 ) == 0 or idx <= 5 :
82+ elapsed = time .time () - start_time
83+ rate = idx / elapsed if elapsed > 0 else 0
84+ remaining = (len (code_files ) - idx ) / rate if rate > 0 else 0
85+ logger .info (f" [{ idx } /{ len (code_files )} ] { file_path } ({ elapsed :.1f} s elapsed, ~{ remaining :.1f} s remaining)" )
86+
87+ self ._analyze_code_file (base_dir , file_info )
88+ files_analyzed += 1
89+ except Exception as e :
90+ files_failed += 1
91+ logger .warning (f" ⚠️ [{ idx } /{ len (code_files )} ] Failed to analyze { file_path } : { str (e )[:100 ]} " )
92+
93+ elapsed_time = time .time () - start_time
94+ logger .info (
95+ f"✓ Analysis complete: { files_analyzed } /{ len (code_files )} files analyzed, "
96+ f"{ files_failed } failed, { len (self .functions )} functions, { len (self .call_relationships )} relationships ({ elapsed_time :.1f} s)"
4997 )
5098
5199 logger .debug ("Resolving call relationships" )
@@ -116,34 +164,38 @@ def _analyze_code_file(self, repo_dir: str, file_info: Dict):
116164 file_path = base / file_info ["path" ]
117165
118166 try :
119- content = safe_open_text (base , file_path )
120- language = file_info ["language" ]
121- if language == "python" :
122- self ._analyze_python_file (file_path , content , repo_dir )
123- elif language == "javascript" :
124- self ._analyze_javascript_file (file_path , content , repo_dir )
125- elif language == "typescript" :
126- self ._analyze_typescript_file (file_path , content , repo_dir )
127- elif language == "java" :
128- self ._analyze_java_file (file_path , content , repo_dir )
129- elif language == "kotlin" :
130- self ._analyze_kotlin_file (file_path , content , repo_dir )
131- elif language == "csharp" :
132- self ._analyze_csharp_file (file_path , content , repo_dir )
133- elif language == "c" :
134- self ._analyze_c_file (file_path , content , repo_dir )
135- elif language == "cpp" :
136- self ._analyze_cpp_file (file_path , content , repo_dir )
137- elif language == "php" :
138- self ._analyze_php_file (file_path , content , repo_dir )
139- # else:
140- # logger.warning(
141- # f"Unsupported language for call graph analysis: {language} for file {file_path}"
142- # )
143-
167+ # Add timeout protection (30 seconds per file max)
168+ with timeout (30 ):
169+ content = safe_open_text (base , file_path )
170+ language = file_info ["language" ]
171+ if language == "python" :
172+ self ._analyze_python_file (file_path , content , repo_dir )
173+ elif language == "javascript" :
174+ self ._analyze_javascript_file (file_path , content , repo_dir )
175+ elif language == "typescript" :
176+ self ._analyze_typescript_file (file_path , content , repo_dir )
177+ elif language == "java" :
178+ self ._analyze_java_file (file_path , content , repo_dir )
179+ elif language == "kotlin" :
180+ self ._analyze_kotlin_file (file_path , content , repo_dir )
181+ elif language == "csharp" :
182+ self ._analyze_csharp_file (file_path , content , repo_dir )
183+ elif language == "c" :
184+ self ._analyze_c_file (file_path , content , repo_dir )
185+ elif language == "cpp" :
186+ self ._analyze_cpp_file (file_path , content , repo_dir )
187+ elif language == "php" :
188+ self ._analyze_php_file (file_path , content , repo_dir )
189+ # else:
190+ # logger.warning(
191+ # f"Unsupported language for call graph analysis: {language} for file {file_path}"
192+ # )
193+
194+ except TimeoutError as e :
195+ logger .warning (f"⏱️ Timeout analyzing { file_path } : { str (e )} " )
144196 except Exception as e :
145- logger .error (f"⚠️ Error analyzing { file_path } : { str (e )} " )
146- logger .error (f"Traceback: { traceback .format_exc ()} " )
197+ logger .debug (f"Error analyzing { file_path } : { str (e )} " )
198+ logger .debug (f"Traceback: { traceback .format_exc ()} " )
147199
148200 def _analyze_python_file (self , file_path : str , content : str , base_dir : str ):
149201 """
0 commit comments