Skip to content

Commit a10c93e

Browse files
authored
Merge pull request #46 from zhalice2011/fix/node-modules-not-excluded
fix: exclude node_modules from dependency analysis and add progress logging
2 parents 40c53e3 + 8ac9647 commit a10c93e

File tree

2 files changed

+91
-34
lines changed

2 files changed

+91
-34
lines changed

codewiki/src/be/dependency_analyzer/analysis/call_graph_analyzer.py

Lines changed: 85 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -9,14 +9,44 @@
99
from typing import Dict, List
1010
import logging
1111
import traceback
12+
import time
13+
import signal
1214
from pathlib import Path
15+
from contextlib import contextmanager
1316
from codewiki.src.be.dependency_analyzer.models.core import Node, CallRelationship
1417
from codewiki.src.be.dependency_analyzer.utils.patterns import CODE_EXTENSIONS
1518
from codewiki.src.be.dependency_analyzer.utils.security import safe_open_text
1619

1720
logger = logging.getLogger(__name__)
1821

1922

23+
class TimeoutError(Exception):
24+
"""Raised when file parsing exceeds timeout."""
25+
pass
26+
27+
28+
@contextmanager
29+
def timeout(seconds):
30+
"""Context manager for timeout on file parsing."""
31+
def signal_handler(signum, frame):
32+
raise TimeoutError(f"File parsing exceeded {seconds}s timeout")
33+
34+
# Only use signal on Unix systems (not Windows)
35+
try:
36+
old_handler = signal.signal(signal.SIGALRM, signal_handler)
37+
signal.alarm(seconds)
38+
yield
39+
except AttributeError:
40+
# Windows doesn't support SIGALRM, skip timeout
41+
yield
42+
finally:
43+
try:
44+
signal.alarm(0)
45+
signal.signal(signal.SIGALRM, old_handler)
46+
except (AttributeError, ValueError):
47+
pass
48+
49+
2050
class CallGraphAnalyzer:
2151
def __init__(self):
2252
"""Initialize the call graph analyzer."""
@@ -35,17 +65,35 @@ def analyze_code_files(self, code_files: List[Dict], base_dir: str) -> Dict:
3565
4. Returns all nodes and relationships
3666
"""
3767
logger.debug(f"Starting analysis of {len(code_files)} files")
68+
logger.info(f"📊 Parsing {len(code_files)} source files (this may take a few minutes)...")
3869

3970
self.functions = {}
4071
self.call_relationships = []
4172

4273
files_analyzed = 0
43-
for file_info in code_files:
44-
logger.debug(f"Analyzing: {file_info['path']}")
45-
self._analyze_code_file(base_dir, file_info)
46-
files_analyzed += 1
47-
logger.debug(
48-
f"Analysis complete: {files_analyzed} files analyzed, {len(self.functions)} functions, {len(self.call_relationships)} relationships"
74+
files_failed = 0
75+
start_time = time.time()
76+
77+
for idx, file_info in enumerate(code_files, 1):
78+
file_path = file_info['path']
79+
try:
80+
# Log progress every file with elapsed time
81+
if idx % max(1, len(code_files) // 10) == 0 or idx <= 5:
82+
elapsed = time.time() - start_time
83+
rate = idx / elapsed if elapsed > 0 else 0
84+
remaining = (len(code_files) - idx) / rate if rate > 0 else 0
85+
logger.info(f" [{idx}/{len(code_files)}] {file_path} ({elapsed:.1f}s elapsed, ~{remaining:.1f}s remaining)")
86+
87+
self._analyze_code_file(base_dir, file_info)
88+
files_analyzed += 1
89+
except Exception as e:
90+
files_failed += 1
91+
logger.warning(f" ⚠️ [{idx}/{len(code_files)}] Failed to analyze {file_path}: {str(e)[:100]}")
92+
93+
elapsed_time = time.time() - start_time
94+
logger.info(
95+
f"✓ Analysis complete: {files_analyzed}/{len(code_files)} files analyzed, "
96+
f"{files_failed} failed, {len(self.functions)} functions, {len(self.call_relationships)} relationships ({elapsed_time:.1f}s)"
4997
)
5098

5199
logger.debug("Resolving call relationships")
@@ -116,34 +164,38 @@ def _analyze_code_file(self, repo_dir: str, file_info: Dict):
116164
file_path = base / file_info["path"]
117165

118166
try:
119-
content = safe_open_text(base, file_path)
120-
language = file_info["language"]
121-
if language == "python":
122-
self._analyze_python_file(file_path, content, repo_dir)
123-
elif language == "javascript":
124-
self._analyze_javascript_file(file_path, content, repo_dir)
125-
elif language == "typescript":
126-
self._analyze_typescript_file(file_path, content, repo_dir)
127-
elif language == "java":
128-
self._analyze_java_file(file_path, content, repo_dir)
129-
elif language == "kotlin":
130-
self._analyze_kotlin_file(file_path, content, repo_dir)
131-
elif language == "csharp":
132-
self._analyze_csharp_file(file_path, content, repo_dir)
133-
elif language == "c":
134-
self._analyze_c_file(file_path, content, repo_dir)
135-
elif language == "cpp":
136-
self._analyze_cpp_file(file_path, content, repo_dir)
137-
elif language == "php":
138-
self._analyze_php_file(file_path, content, repo_dir)
139-
# else:
140-
# logger.warning(
141-
# f"Unsupported language for call graph analysis: {language} for file {file_path}"
142-
# )
143-
167+
# Add timeout protection (30 seconds per file max)
168+
with timeout(30):
169+
content = safe_open_text(base, file_path)
170+
language = file_info["language"]
171+
if language == "python":
172+
self._analyze_python_file(file_path, content, repo_dir)
173+
elif language == "javascript":
174+
self._analyze_javascript_file(file_path, content, repo_dir)
175+
elif language == "typescript":
176+
self._analyze_typescript_file(file_path, content, repo_dir)
177+
elif language == "java":
178+
self._analyze_java_file(file_path, content, repo_dir)
179+
elif language == "kotlin":
180+
self._analyze_kotlin_file(file_path, content, repo_dir)
181+
elif language == "csharp":
182+
self._analyze_csharp_file(file_path, content, repo_dir)
183+
elif language == "c":
184+
self._analyze_c_file(file_path, content, repo_dir)
185+
elif language == "cpp":
186+
self._analyze_cpp_file(file_path, content, repo_dir)
187+
elif language == "php":
188+
self._analyze_php_file(file_path, content, repo_dir)
189+
# else:
190+
# logger.warning(
191+
# f"Unsupported language for call graph analysis: {language} for file {file_path}"
192+
# )
193+
194+
except TimeoutError as e:
195+
logger.warning(f"⏱️ Timeout analyzing {file_path}: {str(e)}")
144196
except Exception as e:
145-
logger.error(f"⚠️ Error analyzing {file_path}: {str(e)}")
146-
logger.error(f"Traceback: {traceback.format_exc()}")
197+
logger.debug(f"Error analyzing {file_path}: {str(e)}")
198+
logger.debug(f"Traceback: {traceback.format_exc()}")
147199

148200
def _analyze_python_file(self, file_path: str, content: str, base_dir: str):
149201
"""

codewiki/src/be/dependency_analyzer/utils/patterns.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,12 +29,17 @@
2929
".hypothesis",
3030
"poetry.lock",
3131
"Pipfile.lock",
32-
# JavaScript/FileSystemNode
32+
# JavaScript/Node.js (CRITICAL: node_modules must be excluded)
33+
"node_modules/",
34+
"node_modules",
3335
"package-lock.json",
3436
"yarn.lock",
3537
".npm",
3638
".yarn",
3739
".pnpm-store",
40+
".next/",
41+
".nuxt/",
42+
".turbo/",
3843
"bun.lock",
3944
"bun.lockb",
4045
# Java

0 commit comments

Comments
 (0)