Skip to content

Commit 3c115f2

Browse files
InzerdParzivalHack
andauthored
feat: add syntax warning param and refactor error message (issue #28) (#47)
# GOAL: fix issue #28, refactor message error during AST file parsing and add a new param to enable python SyntaxWarning ## Changes - CLI & Wizard Mode: Added a new parameter (flag) to enable/disable SyntaxWarning reporting. This allows users to decide if they want to treat syntax warnings as blocking issues or ignore them during scans. - Refactoring get_python_file_asts: - Improved the logic that captures and reports errors during AST generation. - Standardized error messages to make them more descriptive when a file fails to parse. - Integrated the new enable_syntax_warnings logic within the core file-walking loop. - Created a new test suite test_get_asts.py to verify: - Default behavior (warnings ignored). - Behavior when warnings are enabled (treated as errors/exceptions). - Handling of valid, invalid, and encoding-error files. --------- Co-authored-by: Tommaso Bona <piergeolo@gmail.com>
1 parent 3c95471 commit 3c115f2

2 files changed

Lines changed: 144 additions & 27 deletions

File tree

src/pyspector/cli.py

Lines changed: 70 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -125,40 +125,75 @@ def should_skip_file(file_path: Path) -> bool:
125125
return False
126126

127127

128-
def get_python_file_asts(path: Path) -> List[Dict[str, Any]]:
128+
def get_python_file_asts(path: Path, enable_syntax_warnings: bool = False) -> List[Dict[str, Any]]:
129129
"""Recursively finds Python files and returns their content and AST."""
130130
results = []
131-
files_to_scan = list(path.glob('**/*.py')) if path.is_dir() else [path]
131+
files_to_scan = list(path.glob("**/*.py")) if path.is_dir() else [path]
132132

133-
# Suppress Python's SyntaxWarning during AST parsing
133+
# Suppress or treat Python's SyntaxWarning as errors during AST parsing
134134
with warnings.catch_warnings():
135-
warnings.filterwarnings('ignore', category=SyntaxWarning)
136-
135+
if not enable_syntax_warnings:
136+
warnings.filterwarnings('ignore', category=SyntaxWarning)
137+
else:
138+
warnings.filterwarnings('error', category=SyntaxWarning)
139+
137140
for py_file in files_to_scan:
138141
if py_file.is_file():
139-
# Skip test fixtures
142+
# Pre-compute the relative path to maintain consistent log messages
143+
display_path = py_file.relative_to(path) if path.is_dir() else py_file.name
144+
145+
# Skip test fixtures and notify the user
140146
if should_skip_file(py_file):
147+
click.echo(
148+
click.style(
149+
f"Info: Skipped {display_path} (test file or fixture)",
150+
fg="blue",
151+
)
152+
)
141153
continue
142-
154+
143155
try:
144-
content = py_file.read_text(encoding='utf-8')
156+
content = py_file.read_text(encoding="utf-8")
145157
parsed_ast = ast.parse(content, filename=str(py_file))
146158
ast_json = json.dumps(parsed_ast, cls=AstEncoder)
147-
results.append({
148-
"file_path": str(py_file.relative_to(path)) if path.is_dir() else py_file.name,
149-
"content": content,
150-
"ast_json": ast_json
151-
})
159+
results.append(
160+
{
161+
"file_path": str(display_path),
162+
"content": content,
163+
"ast_json": ast_json,
164+
}
165+
)
166+
except SyntaxWarning as e:
167+
# Log a warning when AST parsing fails due to Python syntax warning
168+
click.echo(
169+
click.style(
170+
f"SyntaxWarning: there is a syntax warning in {display_path} - {e.msg} (line {e.lineno})",
171+
fg="yellow",
172+
)
173+
)
152174
except SyntaxError as e:
153-
# Only warn about syntax errors in non-test files
154-
if not should_skip_file(py_file):
155-
click.echo(click.style(
156-
f"Warning: Could not parse {py_file.relative_to(path) if path.is_dir() else py_file.name}: {e.msg} ({py_file.name}, line {e.lineno})",
157-
fg="yellow"
158-
))
175+
# Log a error when AST parsing fails due to invalid Python syntax
176+
click.echo(
177+
click.style(
178+
f"SyntaxError: Could not parse {display_path} - {e.msg} (line {e.lineno})",
179+
fg="red",
180+
)
181+
)
159182
except UnicodeDecodeError as e:
160-
click.echo(click.style(f"Warning: Could not read {py_file}: {e}", fg="yellow"))
161-
183+
# Log a warning when a file cannot be read as utf-8
184+
click.echo(
185+
click.style(
186+
f"Warning: Could not read {display_path} - Invalid UTF-8 encoding ({e.reason})",
187+
fg="yellow",
188+
)
189+
)
190+
except Exception as e:
191+
click.echo(
192+
click.style(
193+
f"Warning: Could not read {display_path} - {e}", fg="yellow"
194+
)
195+
)
196+
162197
return results
163198

164199

@@ -308,6 +343,8 @@ def run_wizard():
308343

309344
supply_chain = click.confirm("Check dependencies for CVE vulnerabilities?", default=False)
310345

346+
syntax_warnings = click.confirm("Treat Python SyntaxWarnings as errors?", default=False)
347+
311348

312349
output_file = None
313350
if report_format != "console":
@@ -325,6 +362,7 @@ def run_wizard():
325362
"report_format": report_format,
326363
"output_file": output_file,
327364
"supply_chain_scan": supply_chain,
365+
"syntax_warnings": syntax_warnings,
328366
}
329367

330368

@@ -342,6 +380,7 @@ def run_wizard():
342380
@click.option('--plugin-config', 'plugin_config_file', type=click.Path(exists=True, path_type=Path), help="Path to plugin configuration JSON file")
343381
@click.option('--list-plugins', 'list_plugins', is_flag=True, help="List available plugins and exit")
344382
@click.option('--supply-chain', is_flag=True, default=False, help="Scan dependencies for known CVE vulnerabilities.")
383+
@click.option('--syntax-warnings', is_flag=True, default=False, help="Treat SyntaxWarning as errors during parsing.")
345384
@click.option('--wizard', is_flag=True, help="Interactive guided scan for first-time users")
346385
def run_scan_command(
347386
path: Optional[Path],
@@ -355,6 +394,7 @@ def run_scan_command(
355394
plugin_config_file: Optional[Path],
356395
list_plugins: bool,
357396
supply_chain: bool,
397+
syntax_warnings: bool,
358398
wizard: bool
359399
):
360400
"""The main scan command with plugin support."""
@@ -391,7 +431,8 @@ def run_scan_command(
391431
params["ai_scan"],
392432
plugins=(),
393433
plugin_config={},
394-
supply_chain_scan=params["supply_chain_scan"]
434+
supply_chain_scan=params["supply_chain_scan"],
435+
syntax_warnings=params["syntax_warnings"]
395436
)
396437
else:
397438
_execute_scan(
@@ -403,7 +444,8 @@ def run_scan_command(
403444
params["ai_scan"],
404445
plugins=(),
405446
plugin_config={},
406-
supply_chain_scan=params["supply_chain_scan"]
447+
supply_chain_scan=params["supply_chain_scan"],
448+
syntax_warnings=params["syntax_warnings"]
407449
)
408450
return
409451

@@ -468,7 +510,7 @@ def run_scan_command(
468510
)
469511
scan_path = Path(temp_dir)
470512
scan_path = Path(temp_dir)
471-
_execute_scan(scan_path, config_path, output_file, report_format, severity_level, ai_scan, plugins, plugin_config, supply_chain)
513+
_execute_scan(scan_path, config_path, output_file, report_format, severity_level, ai_scan, plugins, plugin_config, supply_chain, syntax_warnings)
472514
except subprocess.CalledProcessError as e:
473515
click.echo(click.style(f"Error: Failed to clone repository.\n{e.stderr}", fg="red"))
474516
sys.exit(1)
@@ -479,7 +521,7 @@ def run_scan_command(
479521
# Handle local path scan
480522
scan_path = path
481523
scan_path = path
482-
_execute_scan(scan_path, config_path, output_file, report_format, severity_level, ai_scan, plugins, plugin_config, supply_chain)
524+
_execute_scan(scan_path, config_path, output_file, report_format, severity_level, ai_scan, plugins, plugin_config, supply_chain, syntax_warnings)
483525
return
484526

485527

@@ -492,7 +534,8 @@ def _execute_scan(
492534
ai_scan: bool,
493535
plugins: tuple,
494536
plugin_config: dict,
495-
supply_chain_scan: bool = False
537+
supply_chain_scan: bool = False,
538+
syntax_warnings: bool = False
496539
):
497540
"""Helper function to run the actual scan and reporting."""
498541
start_time = time.time()
@@ -515,7 +558,7 @@ def _execute_scan(
515558
click.echo(click.style(f"Warning: Could not parse baseline file '{baseline_path}'.", fg="yellow"))
516559

517560
# --- AST Generation for Python files ---
518-
python_files_data = get_python_file_asts(scan_path)
561+
python_files_data = get_python_file_asts(scan_path, enable_syntax_warnings=syntax_warnings)
519562
click.echo(f"[*] Successfully parsed {len(python_files_data)} Python files")
520563

521564
# --- Supply Chain Scanning ---

tests/unit/test_get_asts.py

Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,74 @@
1+
import unittest
2+
import tempfile
3+
import json
4+
from pathlib import Path
5+
from unittest.mock import patch, call
6+
7+
from pyspector.cli import get_python_file_asts
8+
9+
10+
class TestGetPythonFileAsts(unittest.TestCase):
11+
12+
def setUp(self):
13+
# Create a temporary directory structure for tests
14+
self.test_dir = tempfile.TemporaryDirectory()
15+
self.base_path = Path(self.test_dir.name)
16+
17+
# Valid python file
18+
self.valid_file = self.base_path / "valid.py"
19+
self.valid_file.write_text("x = 10", encoding="utf-8")
20+
21+
# Syntax warning file
22+
self.warning_syntax = self.base_path / "warning_err.py"
23+
self.warning_syntax.write_bytes(b'path = "c:\windows"')
24+
25+
# Invalid syntax file
26+
self.invalid_syntax = self.base_path / "syntax_err.py"
27+
self.invalid_syntax.write_text("def broken_function(:", encoding="utf-8")
28+
29+
# Encoding error file
30+
self.encoding_err = self.base_path / "encoding_err.py"
31+
self.encoding_err.write_bytes(b"\xff\xfe\x00\x00")
32+
33+
# Fixture file (should be skipped)
34+
self.fixture_dir = self.base_path / "tests" / "fixtures"
35+
self.fixture_dir.mkdir(parents=True)
36+
self.fixture_file = self.fixture_dir / "fixture_file.py"
37+
self.fixture_file.write_text("y = 20", encoding="utf-8")
38+
39+
def tearDown(self):
40+
self.test_dir.cleanup()
41+
42+
# @patch('pyspector.cli.click.echo')
43+
# @patch('pyspector.cli.click.style', side_effect=lambda msg, fg=None, **kwargs: msg)
44+
def test_get_python_file_asts_handling_default(self):
45+
"""Test that by default SyntaxWarnings are ignored and files are included."""
46+
# Run function with default (enable_syntax_warnings=False)
47+
results = get_python_file_asts(self.base_path)
48+
49+
# We expect BOTH the valid python file AND the warning file to be in the result
50+
# because the warning is ignored and parsing proceeds.
51+
self.assertEqual(len(results), 2)
52+
filenames = [r["file_path"] for r in results]
53+
self.assertIn("valid.py", filenames)
54+
self.assertIn("warning_err.py", filenames)
55+
56+
def test_get_python_file_asts_handling_enabled(self):
57+
"""Test that when enabled, SyntaxWarnings are treated as errors and files are excluded."""
58+
# Run function with enable_syntax_warnings=True
59+
results = get_python_file_asts(self.base_path, enable_syntax_warnings=True)
60+
61+
# We expect ONLY the valid python file to be in the result
62+
# because the warning_err.py triggers an exception and is caught.
63+
self.assertEqual(len(results), 1)
64+
self.assertEqual(results[0]["file_path"], "valid.py")
65+
self.assertEqual(results[0]["content"], "x = 10")
66+
self.assertIn("ast_json", results[0])
67+
68+
# Verify JSON properties exist
69+
ast_obj = json.loads(results[0]["ast_json"])
70+
self.assertEqual(ast_obj["node_type"], "Module")
71+
72+
73+
if __name__ == "__main__":
74+
unittest.main()

0 commit comments

Comments
 (0)