Skip to content

Commit 0e22eed

Browse files
Lukas Geigerclaude
andcommitted
fix: correct 6 AST analysis false positives in import/name tracking
- Fix A: auto_fix_unused_imports reads files with encoding fallback (UTF-8 first, latin-1 on UnicodeDecodeError) using readlines() to preserve line number alignment with AST lineno - Fix B: analyze_project catches UnicodeDecodeError alongside IOError/OSError - Fix C: backup and output writes use detected encoding to avoid corrupting latin-1 files with non-ASCII content - Fix D: _collect_unused_import_lines skips __future__ imports to prevent removing 'from __future__ import annotations' with PEP 563 semantics - Fix E: CodeAnalyzer.visit_ExceptHandler adds except-binding names to local_names — ExceptHandler.name is a str, not visited by visit_Name - Fix F: analyze_source filters module-level dunders from missing_imports — __file__/__name__/__doc__ are implicitly available, not in builtins Add 9 regression tests (23 total, all passing). Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
1 parent 57e9644 commit 0e22eed

2 files changed

Lines changed: 215 additions & 16 deletions

File tree

MethodenAnalyser3.py

Lines changed: 39 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -391,6 +391,12 @@ def visit_arg(self, node: ast.arg) -> None:
391391
self.local_names.add(node.arg)
392392
self.generic_visit(node)
393393

394+
def visit_ExceptHandler(self, node: ast.ExceptHandler) -> None:
395+
"""Erfasst Exception-Binding-Namen als lokale Namen."""
396+
if node.name:
397+
self.local_names.add(node.name)
398+
self.generic_visit(node)
399+
394400

395401
# ============================================================================
396402
# HILFSFUNKTIONEN
@@ -697,6 +703,12 @@ def analyze_source(code: str, source_name: str = "<snippet>") -> AnalysisResult:
697703
analyzer.used_names - defs - imports_unique -
698704
calls - analyzer.local_names - BUILTINS - framework_and_widgets - module_provided_attrs
699705
)
706+
# Module-level Dunders (__file__, __name__, __doc__ etc.) sind implizit
707+
# verfügbar, aber nicht in dir(builtins) — Falsch-Positive herausfiltern
708+
missing_imports = {
709+
name for name in missing_imports
710+
if not (name.startswith("__") and name.endswith("__"))
711+
}
700712

701713
return AnalysisResult(
702714
calls=sorted(calls),
@@ -1100,6 +1112,10 @@ def _collect_unused_import_lines(tree: ast.AST, unused_set: Set[str]) -> Set[int
11001112
lines_to_remove: Set[int] = set()
11011113
for node in ast.walk(tree):
11021114
if isinstance(node, (ast.Import, ast.ImportFrom)):
1115+
# __future__-Imports niemals entfernen — sie aendern Python-Semantik
1116+
# (z.B. 'from __future__ import annotations' aktiviert PEP 563)
1117+
if isinstance(node, ast.ImportFrom) and node.module == "__future__":
1118+
continue
11031119
names = [alias.asname or alias.name.split(".")[0] for alias in node.names
11041120
if alias.name != "*"]
11051121
if names and all(name in unused_set for name in names):
@@ -1133,31 +1149,38 @@ def auto_fix_unused_imports(output_widget: scrolledtext.ScrolledText) -> None:
11331149
return
11341150

11351151
try:
1136-
# Datei lesen
1137-
with open(_last_analysis_path, "r", encoding="utf-8") as f:
1138-
lines = f.readlines()
1139-
1140-
# AST parsen um Import-Zeilen zu finden
1141-
with open(_last_analysis_path, "r", encoding="utf-8") as f:
1142-
tree = ast.parse(f.read())
1143-
1152+
# Datei lesen mit Encoding-Fallback — erkanntes Encoding für Schreibzugriff merken
1153+
detected_encoding = "utf-8"
1154+
try:
1155+
with open(_last_analysis_path, "r", encoding="utf-8") as f:
1156+
lines = f.readlines()
1157+
except UnicodeDecodeError:
1158+
detected_encoding = "latin-1"
1159+
with open(_last_analysis_path, "r", encoding="latin-1") as f:
1160+
lines = f.readlines()
1161+
1162+
# AST parsen (readlines() beibehalten — splitlines() würde bei \x0c
1163+
# Zeilennummern gegenüber AST-lineno verschieben und falsche Zeilen löschen)
1164+
tree = ast.parse("".join(lines))
1165+
11441166
# Import-Zeilen markieren die entfernt werden sollen
11451167
unused_set = set(_last_analysis_result.unused_imports)
11461168
lines_to_remove = _collect_unused_import_lines(tree, unused_set)
1147-
1169+
11481170
if not lines_to_remove:
11491171
messagebox.showinfo("Info", "Keine vollständig ungenutzten Import-Zeilen gefunden.\n(Teilweise genutzte Imports müssen manuell bearbeitet werden)")
11501172
return
1151-
1152-
# Backup erstellen
1173+
1174+
# Backup und Ausgabe im erkannten Encoding — verhindert Korrumpierung von
1175+
# latin-1-Dateien mit nicht-ASCII-Zeichen und # coding: latin-1 Deklaration
11531176
backup_path = _last_analysis_path + ".bak"
1154-
with open(backup_path, "w", encoding="utf-8") as f:
1177+
with open(backup_path, "w", encoding=detected_encoding) as f:
11551178
f.writelines(lines)
1156-
1179+
11571180
# Neue Datei ohne ungenutzte Imports
11581181
new_lines = [line for i, line in enumerate(lines, 1) if i not in lines_to_remove]
1159-
1160-
with open(_last_analysis_path, "w", encoding="utf-8") as f:
1182+
1183+
with open(_last_analysis_path, "w", encoding=detected_encoding) as f:
11611184
f.writelines(new_lines)
11621185

11631186
# Ausgabe
@@ -1235,7 +1258,7 @@ def analyze_project(folder_path: str, progress_callback=None) -> ProjectAnalysis
12351258
try:
12361259
with open(file_path, 'r', encoding='utf-8') as f:
12371260
total_lines += len(f.readlines())
1238-
except (IOError, OSError):
1261+
except (IOError, OSError, UnicodeDecodeError):
12391262
pass
12401263
rel_path = os.path.relpath(file_path, folder_path)
12411264
if result.unused_imports:

tests/test_cli.py

Lines changed: 176 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -360,6 +360,182 @@ def test_collect_unused_import_lines_handles_dotted_imports(self) -> None:
360360
"import os.path muss als entfernbar markiert werden wenn 'os' ungenutzt ist",
361361
)
362362

363+
def test_collect_unused_import_lines_keeps_future_imports(self) -> None:
364+
"""Regression (Bug D): from __future__ import annotations darf nicht entfernt
365+
werden, auch wenn 'annotations' nicht explizit als Name genutzt wird."""
366+
import ast as _ast
367+
sys.path.insert(0, str(PROJECT_ROOT))
368+
from MethodenAnalyser3 import _collect_unused_import_lines
369+
370+
code = "from __future__ import annotations\nimport os\nx = 1\n"
371+
tree = _ast.parse(code)
372+
lines_to_remove = _collect_unused_import_lines(tree, {"annotations", "os"})
373+
374+
self.assertNotIn(1, lines_to_remove, "__future__-Import darf nicht entfernt werden")
375+
self.assertIn(2, lines_to_remove, "normaler unbenutzter Import muss markiert werden")
376+
377+
378+
class TestEncodingHandling(unittest.TestCase):
379+
"""Tests für Encoding-Fallback bei nicht-UTF-8-Dateien (Latin-1)."""
380+
381+
def setUp(self):
382+
sys.path.insert(0, str(PROJECT_ROOT))
383+
self.tmpdir = tempfile.mkdtemp()
384+
385+
def tearDown(self):
386+
import shutil
387+
shutil.rmtree(self.tmpdir, ignore_errors=True)
388+
389+
def test_analyze_project_latin1_file_not_in_errors(self) -> None:
390+
"""Regression (Bug B): analyze_project() darf latin-1-Dateien nicht in
391+
files_with_errors listen, wenn die Analyse per Encoding-Fallback erfolgreich war."""
392+
from MethodenAnalyser3 import analyze_project
393+
394+
latin1_code = b"# encoding: latin-1\nimport os\nx = 'caf\xe9'\n"
395+
file_path = os.path.join(self.tmpdir, "latin1_file.py")
396+
with open(file_path, "wb") as f:
397+
f.write(latin1_code)
398+
399+
result = analyze_project(self.tmpdir)
400+
401+
error_paths = [e[0] for e in result.files_with_errors]
402+
self.assertNotIn(
403+
file_path,
404+
error_paths,
405+
"latin-1-Datei darf nicht in files_with_errors stehen wenn Analyse erfolgreich war",
406+
)
407+
self.assertEqual(result.files_analyzed, 1)
408+
409+
def _call_auto_fix(self, filepath, result):
410+
"""Setzt Globals, ruft auto_fix_unused_imports mit gemockter GUI auf."""
411+
import unittest.mock
412+
import MethodenAnalyser3 as m3
413+
orig_path = m3._last_analysis_path
414+
orig_result = m3._last_analysis_result
415+
try:
416+
m3._last_analysis_path = filepath
417+
m3._last_analysis_result = result
418+
with unittest.mock.patch("MethodenAnalyser3.messagebox") as mb:
419+
mb.askyesno.return_value = True
420+
m3.auto_fix_unused_imports(unittest.mock.MagicMock())
421+
finally:
422+
m3._last_analysis_path = orig_path
423+
m3._last_analysis_result = orig_result
424+
425+
def test_auto_fix_works_on_latin1_file(self) -> None:
426+
"""Regression (Bug A): auto_fix_unused_imports() darf bei latin-1-Dateien
427+
nicht mit UnicodeDecodeError abstuerzen und muss den Import korrekt entfernen."""
428+
import MethodenAnalyser3 as m3
429+
430+
latin1_code = b"import os\nimport sys\nx = 'caf\xe9'\nprint(sys.argv)\n"
431+
filepath = os.path.join(self.tmpdir, "latin1_autofix.py")
432+
with open(filepath, "wb") as f:
433+
f.write(latin1_code)
434+
435+
result = m3.analyze_file(filepath)
436+
self.assertIn("os", result.unused_imports)
437+
438+
self._call_auto_fix(filepath, result)
439+
440+
with open(filepath, "r", encoding="latin-1") as f:
441+
content = f.read()
442+
self.assertNotIn("import os\n", content)
443+
self.assertIn("import sys\n", content)
444+
445+
def test_auto_fix_form_feed_line_alignment(self) -> None:
446+
"""Regression (Fix A): Form-Feed \\x0c darf AST-Zeilennummern nicht verschieben
447+
— splitlines() wuerde bei \\x0c extra Zeilen erzeugen, readlines() nicht."""
448+
import MethodenAnalyser3 as m3
449+
450+
# \x0c vor import os: splitlines() wuerde Zeile 1=leer, 2=import os sehen,
451+
# AST sieht aber lineno=1 fuer import os — readlines() bleibt konsistent.
452+
code = b"\x0cimport os\nimport sys\nprint(os.getcwd())\n"
453+
filepath = os.path.join(self.tmpdir, "formfeed_autofix.py")
454+
with open(filepath, "wb") as f:
455+
f.write(code)
456+
457+
result = m3.analyze_file(filepath)
458+
self.assertIn("sys", result.unused_imports)
459+
self.assertNotIn("os", result.unused_imports)
460+
461+
self._call_auto_fix(filepath, result)
462+
463+
with open(filepath, "r", encoding="utf-8") as f:
464+
content = f.read()
465+
self.assertNotIn("import sys", content)
466+
self.assertIn("import os", content)
467+
468+
def test_auto_fix_preserves_latin1_encoding_for_non_ascii_content(self) -> None:
469+
"""Regression (Bug C): auto_fix darf bei latin-1-Dateien das Encoding nicht auf
470+
UTF-8 aendern — wuerde Dateien mit '# coding: latin-1' und nicht-ASCII korrumpieren."""
471+
import MethodenAnalyser3 as m3
472+
473+
# Datei mit latin-1 Nicht-ASCII-Zeichen (café = caf + \xe9)
474+
latin1_code = b"import os\nimport sys\nx = 'caf\xe9'\nprint(sys.argv)\n"
475+
filepath = os.path.join(self.tmpdir, "latin1_nonascii.py")
476+
with open(filepath, "wb") as f:
477+
f.write(latin1_code)
478+
479+
result = m3.analyze_file(filepath)
480+
self.assertIn("os", result.unused_imports)
481+
482+
self._call_auto_fix(filepath, result)
483+
484+
# Datei muss weiterhin als latin-1 lesbar sein (kein UnicodeDecodeError)
485+
with open(filepath, "rb") as f:
486+
raw_bytes = f.read()
487+
# Das nicht-ASCII-Byte \xe9 (é in latin-1) muss erhalten bleiben
488+
self.assertIn(b"\xe9", raw_bytes, "latin-1 Byte \\xe9 darf nach auto_fix nicht fehlen")
489+
# Darf NICHT als UTF-8-Sequenz \xc3\xa9 codiert worden sein
490+
self.assertNotIn(b"\xc3\xa9", raw_bytes, "Encoding darf nicht von latin-1 auf UTF-8 geaendert worden sein")
491+
492+
493+
class TestExceptHandlerAndDunders(unittest.TestCase):
494+
"""Tests für Bug E (ExceptHandler-Binding) und Bug F (Module-Dunders)."""
495+
496+
def setUp(self):
497+
sys.path.insert(0, str(PROJECT_ROOT))
498+
499+
def test_except_binding_not_in_missing_imports(self) -> None:
500+
"""Regression (Bug E): 'except Exception as e:' darf 'e' nicht als
501+
missing_import ausweisen — ExceptHandler.name ist ein str, kein ast.Name-Knoten."""
502+
from MethodenAnalyser3 import analyze_source
503+
504+
code = textwrap.dedent("""
505+
import sys
506+
507+
def run():
508+
try:
509+
pass
510+
except Exception as e:
511+
print(e)
512+
""").strip() + "\n"
513+
514+
result = analyze_source(code)
515+
self.assertNotIn(
516+
"e",
517+
result.missing_imports,
518+
"Exception-Binding 'e' darf nicht als missing_import erscheinen",
519+
)
520+
521+
def test_module_dunders_not_in_missing_imports(self) -> None:
522+
"""Regression (Bug F): __file__, __name__, __doc__ sind implizit verfuegbar
523+
und duerfen nicht als missing_imports erscheinen."""
524+
from MethodenAnalyser3 import analyze_source
525+
526+
code = textwrap.dedent("""
527+
def info():
528+
print(__file__, __name__, __doc__)
529+
""").strip() + "\n"
530+
531+
result = analyze_source(code)
532+
for dunder in ("__file__", "__name__", "__doc__"):
533+
self.assertNotIn(
534+
dunder,
535+
result.missing_imports,
536+
f"{dunder} ist implizit verfuegbar und darf nicht in missing_imports stehen",
537+
)
538+
363539

364540
class TranslatorIsGermanTests(unittest.TestCase):
365541
"""Tests für TranslationSystem._is_german()."""

0 commit comments

Comments
 (0)