fix: correct 6 AST analysis false positives in import/name tracking

Lukas Geiger · claude · Lukas Geiger · commit 0e22eedad714 · 2026-06-10T12:44:05.000+02:00
- Fix A: auto_fix_unused_imports reads files with encoding fallback
  (UTF-8 first, latin-1 on UnicodeDecodeError) using readlines() to
  preserve line number alignment with AST lineno
- Fix B: analyze_project catches UnicodeDecodeError alongside IOError/OSError
- Fix C: backup and output writes use detected encoding to avoid corrupting
  latin-1 files with non-ASCII content
- Fix D: _collect_unused_import_lines skips __future__ imports to prevent
  removing 'from __future__ import annotations' with PEP 563 semantics
- Fix E: CodeAnalyzer.visit_ExceptHandler adds except-binding names to
  local_names — ExceptHandler.name is a str, not visited by visit_Name
- Fix F: analyze_source filters module-level dunders from missing_imports
  — __file__/__name__/__doc__ are implicitly available, not in builtins

Add 9 regression tests (23 total, all passing).

Co-Authored-By: Claude Sonnet 4.6 &lt;noreply@anthropic.com&gt;
diff --git a/MethodenAnalyser3.py b/MethodenAnalyser3.py
@@ -391,6 +391,12 @@ def visit_arg(self, node: ast.arg) -> None:
         self.local_names.add(node.arg)
         self.generic_visit(node)
 
+    def visit_ExceptHandler(self, node: ast.ExceptHandler) -> None:
+        """Erfasst Exception-Binding-Namen als lokale Namen."""
+        if node.name:
+            self.local_names.add(node.name)
+        self.generic_visit(node)
+
 
 # ============================================================================
 # HILFSFUNKTIONEN
@@ -697,6 +703,12 @@ def analyze_source(code: str, source_name: str = "<snippet>") -> AnalysisResult:
         analyzer.used_names - defs - imports_unique -
         calls - analyzer.local_names - BUILTINS - framework_and_widgets - module_provided_attrs
     )
+    # Module-level Dunders (__file__, __name__, __doc__ etc.) sind implizit
+    # verfügbar, aber nicht in dir(builtins) — Falsch-Positive herausfiltern
+    missing_imports = {
+        name for name in missing_imports
+        if not (name.startswith("__") and name.endswith("__"))
+    }
 
     return AnalysisResult(
         calls=sorted(calls),
@@ -1100,6 +1112,10 @@ def _collect_unused_import_lines(tree: ast.AST, unused_set: Set[str]) -> Set[int
     lines_to_remove: Set[int] = set()
     for node in ast.walk(tree):
         if isinstance(node, (ast.Import, ast.ImportFrom)):
+            # __future__-Imports niemals entfernen — sie aendern Python-Semantik
+            # (z.B. 'from __future__ import annotations' aktiviert PEP 563)
+            if isinstance(node, ast.ImportFrom) and node.module == "__future__":
+                continue
             names = [alias.asname or alias.name.split(".")[0] for alias in node.names
                      if alias.name != "*"]
             if names and all(name in unused_set for name in names):
@@ -1133,31 +1149,38 @@ def auto_fix_unused_imports(output_widget: scrolledtext.ScrolledText) -> None:
         return
     
     try:
-        # Datei lesen
-        with open(_last_analysis_path, "r", encoding="utf-8") as f:
-            lines = f.readlines()
-        
-        # AST parsen um Import-Zeilen zu finden
-        with open(_last_analysis_path, "r", encoding="utf-8") as f:
-            tree = ast.parse(f.read())
-        
+        # Datei lesen mit Encoding-Fallback — erkanntes Encoding für Schreibzugriff merken
+        detected_encoding = "utf-8"
+        try:
+            with open(_last_analysis_path, "r", encoding="utf-8") as f:
+                lines = f.readlines()
+        except UnicodeDecodeError:
+            detected_encoding = "latin-1"
+            with open(_last_analysis_path, "r", encoding="latin-1") as f:
+                lines = f.readlines()
+
+        # AST parsen (readlines() beibehalten — splitlines() würde bei \x0c
+        # Zeilennummern gegenüber AST-lineno verschieben und falsche Zeilen löschen)
+        tree = ast.parse("".join(lines))
+
         # Import-Zeilen markieren die entfernt werden sollen
         unused_set = set(_last_analysis_result.unused_imports)
         lines_to_remove = _collect_unused_import_lines(tree, unused_set)
-        
+
         if not lines_to_remove:
             messagebox.showinfo("Info", "Keine vollständig ungenutzten Import-Zeilen gefunden.\n(Teilweise genutzte Imports müssen manuell bearbeitet werden)")
             return
-        
-        # Backup erstellen
+
+        # Backup und Ausgabe im erkannten Encoding — verhindert Korrumpierung von
+        # latin-1-Dateien mit nicht-ASCII-Zeichen und # coding: latin-1 Deklaration
         backup_path = _last_analysis_path + ".bak"
-        with open(backup_path, "w", encoding="utf-8") as f:
+        with open(backup_path, "w", encoding=detected_encoding) as f:
             f.writelines(lines)
-        
+
         # Neue Datei ohne ungenutzte Imports
         new_lines = [line for i, line in enumerate(lines, 1) if i not in lines_to_remove]
-        
-        with open(_last_analysis_path, "w", encoding="utf-8") as f:
+
+        with open(_last_analysis_path, "w", encoding=detected_encoding) as f:
             f.writelines(new_lines)
         
         # Ausgabe
@@ -1235,7 +1258,7 @@ def analyze_project(folder_path: str, progress_callback=None) -> ProjectAnalysis
             try:
                 with open(file_path, 'r', encoding='utf-8') as f:
                     total_lines += len(f.readlines())
-            except (IOError, OSError):
+            except (IOError, OSError, UnicodeDecodeError):
                 pass
             rel_path = os.path.relpath(file_path, folder_path)
             if result.unused_imports:
diff --git a/tests/test_cli.py b/tests/test_cli.py
@@ -360,6 +360,182 @@ def test_collect_unused_import_lines_handles_dotted_imports(self) -> None:
             "import os.path muss als entfernbar markiert werden wenn 'os' ungenutzt ist",
         )
 
+    def test_collect_unused_import_lines_keeps_future_imports(self) -> None:
+        """Regression (Bug D): from __future__ import annotations darf nicht entfernt
+        werden, auch wenn 'annotations' nicht explizit als Name genutzt wird."""
+        import ast as _ast
+        sys.path.insert(0, str(PROJECT_ROOT))
+        from MethodenAnalyser3 import _collect_unused_import_lines
+
+        code = "from __future__ import annotations\nimport os\nx = 1\n"
+        tree = _ast.parse(code)
+        lines_to_remove = _collect_unused_import_lines(tree, {"annotations", "os"})
+
+        self.assertNotIn(1, lines_to_remove, "__future__-Import darf nicht entfernt werden")
+        self.assertIn(2, lines_to_remove, "normaler unbenutzter Import muss markiert werden")
+
+
+class TestEncodingHandling(unittest.TestCase):
+    """Tests für Encoding-Fallback bei nicht-UTF-8-Dateien (Latin-1)."""
+
+    def setUp(self):
+        sys.path.insert(0, str(PROJECT_ROOT))
+        self.tmpdir = tempfile.mkdtemp()
+
+    def tearDown(self):
+        import shutil
+        shutil.rmtree(self.tmpdir, ignore_errors=True)
+
+    def test_analyze_project_latin1_file_not_in_errors(self) -> None:
+        """Regression (Bug B): analyze_project() darf latin-1-Dateien nicht in
+        files_with_errors listen, wenn die Analyse per Encoding-Fallback erfolgreich war."""
+        from MethodenAnalyser3 import analyze_project
+
+        latin1_code = b"# encoding: latin-1\nimport os\nx = 'caf\xe9'\n"
+        file_path = os.path.join(self.tmpdir, "latin1_file.py")
+        with open(file_path, "wb") as f:
+            f.write(latin1_code)
+
+        result = analyze_project(self.tmpdir)
+
+        error_paths = [e[0] for e in result.files_with_errors]
+        self.assertNotIn(
+            file_path,
+            error_paths,
+            "latin-1-Datei darf nicht in files_with_errors stehen wenn Analyse erfolgreich war",
+        )
+        self.assertEqual(result.files_analyzed, 1)
+
+    def _call_auto_fix(self, filepath, result):
+        """Setzt Globals, ruft auto_fix_unused_imports mit gemockter GUI auf."""
+        import unittest.mock
+        import MethodenAnalyser3 as m3
+        orig_path = m3._last_analysis_path
+        orig_result = m3._last_analysis_result
+        try:
+            m3._last_analysis_path = filepath
+            m3._last_analysis_result = result
+            with unittest.mock.patch("MethodenAnalyser3.messagebox") as mb:
+                mb.askyesno.return_value = True
+                m3.auto_fix_unused_imports(unittest.mock.MagicMock())
+        finally:
+            m3._last_analysis_path = orig_path
+            m3._last_analysis_result = orig_result
+
+    def test_auto_fix_works_on_latin1_file(self) -> None:
+        """Regression (Bug A): auto_fix_unused_imports() darf bei latin-1-Dateien
+        nicht mit UnicodeDecodeError abstuerzen und muss den Import korrekt entfernen."""
+        import MethodenAnalyser3 as m3
+
+        latin1_code = b"import os\nimport sys\nx = 'caf\xe9'\nprint(sys.argv)\n"
+        filepath = os.path.join(self.tmpdir, "latin1_autofix.py")
+        with open(filepath, "wb") as f:
+            f.write(latin1_code)
+
+        result = m3.analyze_file(filepath)
+        self.assertIn("os", result.unused_imports)
+
+        self._call_auto_fix(filepath, result)
+
+        with open(filepath, "r", encoding="latin-1") as f:
+            content = f.read()
+        self.assertNotIn("import os\n", content)
+        self.assertIn("import sys\n", content)
+
+    def test_auto_fix_form_feed_line_alignment(self) -> None:
+        """Regression (Fix A): Form-Feed \\x0c darf AST-Zeilennummern nicht verschieben
+        — splitlines() wuerde bei \\x0c extra Zeilen erzeugen, readlines() nicht."""
+        import MethodenAnalyser3 as m3
+
+        # \x0c vor import os: splitlines() wuerde Zeile 1=leer, 2=import os sehen,
+        # AST sieht aber lineno=1 fuer import os — readlines() bleibt konsistent.
+        code = b"\x0cimport os\nimport sys\nprint(os.getcwd())\n"
+        filepath = os.path.join(self.tmpdir, "formfeed_autofix.py")
+        with open(filepath, "wb") as f:
+            f.write(code)
+
+        result = m3.analyze_file(filepath)
+        self.assertIn("sys", result.unused_imports)
+        self.assertNotIn("os", result.unused_imports)
+
+        self._call_auto_fix(filepath, result)
+
+        with open(filepath, "r", encoding="utf-8") as f:
+            content = f.read()
+        self.assertNotIn("import sys", content)
+        self.assertIn("import os", content)
+
+    def test_auto_fix_preserves_latin1_encoding_for_non_ascii_content(self) -> None:
+        """Regression (Bug C): auto_fix darf bei latin-1-Dateien das Encoding nicht auf
+        UTF-8 aendern — wuerde Dateien mit '# coding: latin-1' und nicht-ASCII korrumpieren."""
+        import MethodenAnalyser3 as m3
+
+        # Datei mit latin-1 Nicht-ASCII-Zeichen (café = caf + \xe9)
+        latin1_code = b"import os\nimport sys\nx = 'caf\xe9'\nprint(sys.argv)\n"
+        filepath = os.path.join(self.tmpdir, "latin1_nonascii.py")
+        with open(filepath, "wb") as f:
+            f.write(latin1_code)
+
+        result = m3.analyze_file(filepath)
+        self.assertIn("os", result.unused_imports)
+
+        self._call_auto_fix(filepath, result)
+
+        # Datei muss weiterhin als latin-1 lesbar sein (kein UnicodeDecodeError)
+        with open(filepath, "rb") as f:
+            raw_bytes = f.read()
+        # Das nicht-ASCII-Byte \xe9 (é in latin-1) muss erhalten bleiben
+        self.assertIn(b"\xe9", raw_bytes, "latin-1 Byte \\xe9 darf nach auto_fix nicht fehlen")
+        # Darf NICHT als UTF-8-Sequenz \xc3\xa9 codiert worden sein
+        self.assertNotIn(b"\xc3\xa9", raw_bytes, "Encoding darf nicht von latin-1 auf UTF-8 geaendert worden sein")
+
+
+class TestExceptHandlerAndDunders(unittest.TestCase):
+    """Tests für Bug E (ExceptHandler-Binding) und Bug F (Module-Dunders)."""
+
+    def setUp(self):
+        sys.path.insert(0, str(PROJECT_ROOT))
+
+    def test_except_binding_not_in_missing_imports(self) -> None:
+        """Regression (Bug E): 'except Exception as e:' darf 'e' nicht als
+        missing_import ausweisen — ExceptHandler.name ist ein str, kein ast.Name-Knoten."""
+        from MethodenAnalyser3 import analyze_source
+
+        code = textwrap.dedent("""
+            import sys
+
+            def run():
+                try:
+                    pass
+                except Exception as e:
+                    print(e)
+        """).strip() + "\n"
+
+        result = analyze_source(code)
+        self.assertNotIn(
+            "e",
+            result.missing_imports,
+            "Exception-Binding 'e' darf nicht als missing_import erscheinen",
+        )
+
+    def test_module_dunders_not_in_missing_imports(self) -> None:
+        """Regression (Bug F): __file__, __name__, __doc__ sind implizit verfuegbar
+        und duerfen nicht als missing_imports erscheinen."""
+        from MethodenAnalyser3 import analyze_source
+
+        code = textwrap.dedent("""
+            def info():
+                print(__file__, __name__, __doc__)
+        """).strip() + "\n"
+
+        result = analyze_source(code)
+        for dunder in ("__file__", "__name__", "__doc__"):
+            self.assertNotIn(
+                dunder,
+                result.missing_imports,
+                f"{dunder} ist implizit verfuegbar und darf nicht in missing_imports stehen",
+            )
+
 
 class TranslatorIsGermanTests(unittest.TestCase):
     """Tests für TranslationSystem._is_german()."""