|
| 1 | +"""Static import-integrity checks for the openadapt_capture package. |
| 2 | +
|
| 3 | +Guards against the failure class behind OpenAdaptAI/OpenAdapt#999: |
| 4 | +``from openadapt_capture.cloud.local import serve_dashboard`` parsed fine, |
| 5 | +only exploded at call time, and a bare ``except ImportError`` reported |
| 6 | +it as "openadapt-ml not installed". Imports inside function bodies are |
| 7 | +invisible to plain import-the-module tests, so these checks walk the |
| 8 | +AST instead and need no heavy runtime dependencies. |
| 9 | +
|
| 10 | +Two checks: |
| 11 | +
|
| 12 | +1. test_no_phantom_imports — every ``from openadapt_capture.x import y`` |
| 13 | + anywhere in the package (including inside functions) names something |
| 14 | + that actually exists in the target module. |
| 15 | +2. test_no_phantom_kwargs — every call to a function imported from an |
| 16 | + internal module passes only keyword arguments that exist in that |
| 17 | + function's signature. Conservative: decorated functions, classes, |
| 18 | + and functions taking **kwargs are skipped. |
| 19 | +""" |
| 20 | + |
| 21 | +from __future__ import annotations |
| 22 | + |
| 23 | +import ast |
| 24 | +from pathlib import Path |
| 25 | + |
| 26 | +PACKAGE_NAME = "openadapt_capture" |
| 27 | +PACKAGE_ROOT = Path(__file__).resolve().parent.parent / PACKAGE_NAME |
| 28 | + |
| 29 | +# Known-acceptable exceptions, as (module, imported_name). Keep empty |
| 30 | +# unless a module defines names dynamically in a way the AST walk |
| 31 | +# cannot see. |
| 32 | +PHANTOM_IMPORT_ALLOWLIST: set[tuple[str, str]] = set() |
| 33 | + |
| 34 | + |
| 35 | +# --------------------------------------------------------------------------- |
| 36 | +# Module discovery |
| 37 | +# --------------------------------------------------------------------------- |
| 38 | + |
| 39 | + |
| 40 | +def _module_map() -> dict[str, Path]: |
| 41 | + """Map dotted module names to file paths for the whole package.""" |
| 42 | + modules: dict[str, Path] = {} |
| 43 | + for path in PACKAGE_ROOT.rglob("*.py"): |
| 44 | + rel = path.relative_to(PACKAGE_ROOT.parent) |
| 45 | + parts = list(rel.with_suffix("").parts) |
| 46 | + if parts[-1] == "__init__": |
| 47 | + parts = parts[:-1] |
| 48 | + modules[".".join(parts)] = path |
| 49 | + return modules |
| 50 | + |
| 51 | + |
| 52 | +MODULES = _module_map() |
| 53 | + |
| 54 | + |
| 55 | +# --------------------------------------------------------------------------- |
| 56 | +# Definition collection |
| 57 | +# --------------------------------------------------------------------------- |
| 58 | + |
| 59 | + |
| 60 | +def _collect_defined(tree: ast.Module) -> tuple[set[str], bool]: |
| 61 | + """Names defined at module level, and whether the module is dynamic. |
| 62 | +
|
| 63 | + Walks module-level statements, descending into If/Try/With bodies |
| 64 | + (TYPE_CHECKING guards, optional-import fallbacks) but not into |
| 65 | + function or class bodies. A module is "dynamic" if it star-imports |
| 66 | + or defines module-level __getattr__; we skip checking those. |
| 67 | + """ |
| 68 | + defined: set[str] = set() |
| 69 | + dynamic = False |
| 70 | + |
| 71 | + def visit_body(body: list[ast.stmt]) -> None: |
| 72 | + nonlocal dynamic |
| 73 | + for node in body: |
| 74 | + if isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef)): |
| 75 | + defined.add(node.name) |
| 76 | + if node.name == "__getattr__": |
| 77 | + dynamic = True |
| 78 | + elif isinstance(node, ast.ClassDef): |
| 79 | + defined.add(node.name) |
| 80 | + elif isinstance(node, ast.Assign): |
| 81 | + for target in node.targets: |
| 82 | + for name_node in ast.walk(target): |
| 83 | + if isinstance(name_node, ast.Name): |
| 84 | + defined.add(name_node.id) |
| 85 | + elif isinstance(node, (ast.AnnAssign, ast.AugAssign)): |
| 86 | + if isinstance(node.target, ast.Name): |
| 87 | + defined.add(node.target.id) |
| 88 | + elif isinstance(node, ast.Import): |
| 89 | + for alias in node.names: |
| 90 | + defined.add((alias.asname or alias.name).split(".")[0]) |
| 91 | + elif isinstance(node, ast.ImportFrom): |
| 92 | + for alias in node.names: |
| 93 | + if alias.name == "*": |
| 94 | + dynamic = True |
| 95 | + else: |
| 96 | + defined.add(alias.asname or alias.name) |
| 97 | + elif isinstance(node, (ast.If, ast.Try, ast.With)): |
| 98 | + for sub in ast.iter_child_nodes(node): |
| 99 | + if isinstance(sub, list): |
| 100 | + continue |
| 101 | + visit_body(getattr(node, "body", [])) |
| 102 | + visit_body(getattr(node, "orelse", [])) |
| 103 | + visit_body(getattr(node, "finalbody", [])) |
| 104 | + for handler in getattr(node, "handlers", []): |
| 105 | + visit_body(handler.body) |
| 106 | + |
| 107 | + visit_body(tree.body) |
| 108 | + return defined, dynamic |
| 109 | + |
| 110 | + |
| 111 | +def _parse(path: Path) -> ast.Module: |
| 112 | + return ast.parse(path.read_text(encoding="utf-8"), filename=str(path)) |
| 113 | + |
| 114 | + |
| 115 | +_DEFINED_CACHE: dict[str, tuple[set[str], bool]] = {} |
| 116 | + |
| 117 | + |
| 118 | +def _defined_in(module: str) -> tuple[set[str], bool] | None: |
| 119 | + """Defined names for a module in the package, or None if not ours.""" |
| 120 | + if module not in MODULES: |
| 121 | + return None |
| 122 | + if module not in _DEFINED_CACHE: |
| 123 | + _DEFINED_CACHE[module] = _collect_defined(_parse(MODULES[module])) |
| 124 | + return _DEFINED_CACHE[module] |
| 125 | + |
| 126 | + |
| 127 | +def _resolve_relative(current_module: str, node: ast.ImportFrom) -> str | None: |
| 128 | + """Resolve a (possibly relative) ImportFrom to a dotted module name.""" |
| 129 | + if node.level == 0: |
| 130 | + return node.module |
| 131 | + parts = current_module.split(".") |
| 132 | + # level=1 from a module means its containing package; packages |
| 133 | + # (__init__) count as themselves. |
| 134 | + if MODULES.get(current_module, Path()).name != "__init__.py": |
| 135 | + parts = parts[:-1] |
| 136 | + cut = node.level - 1 |
| 137 | + if cut: |
| 138 | + parts = parts[:-cut] if cut <= len(parts) else [] |
| 139 | + base = ".".join(parts) |
| 140 | + if node.module: |
| 141 | + return f"{base}.{node.module}" if base else node.module |
| 142 | + return base or None |
| 143 | + |
| 144 | + |
| 145 | +# --------------------------------------------------------------------------- |
| 146 | +# Check 1: phantom imports |
| 147 | +# --------------------------------------------------------------------------- |
| 148 | + |
| 149 | + |
| 150 | +def test_no_phantom_imports(): |
| 151 | + problems: list[str] = [] |
| 152 | + |
| 153 | + for current, path in sorted(MODULES.items()): |
| 154 | + tree = _parse(path) |
| 155 | + for node in ast.walk(tree): |
| 156 | + if not isinstance(node, ast.ImportFrom): |
| 157 | + continue |
| 158 | + target = _resolve_relative(current, node) |
| 159 | + if not target or not ( |
| 160 | + target == PACKAGE_NAME or target.startswith(PACKAGE_NAME + ".") |
| 161 | + ): |
| 162 | + continue |
| 163 | + info = _defined_in(target) |
| 164 | + if info is None: |
| 165 | + # Importing from a module we can't find at all. |
| 166 | + if target in MODULES or f"{target}.__init__" in MODULES: |
| 167 | + continue |
| 168 | + problems.append( |
| 169 | + f"{path}:{node.lineno}: imports from missing module '{target}'" |
| 170 | + ) |
| 171 | + continue |
| 172 | + defined, dynamic = info |
| 173 | + if dynamic: |
| 174 | + continue |
| 175 | + for alias in node.names: |
| 176 | + if alias.name == "*": |
| 177 | + continue |
| 178 | + if alias.name in defined: |
| 179 | + continue |
| 180 | + # Importing a submodule: from openadapt_capture.cloud import local |
| 181 | + if f"{target}.{alias.name}" in MODULES: |
| 182 | + continue |
| 183 | + if (target, alias.name) in PHANTOM_IMPORT_ALLOWLIST: |
| 184 | + continue |
| 185 | + problems.append( |
| 186 | + f"{path}:{node.lineno}: 'from {target} import " |
| 187 | + f"{alias.name}' but '{alias.name}' is not defined in " |
| 188 | + f"{MODULES[target]}" |
| 189 | + ) |
| 190 | + |
| 191 | + assert not problems, ( |
| 192 | + "Phantom imports detected (names imported from internal modules " |
| 193 | + "that do not exist there). These typically only explode at call " |
| 194 | + "time and get masked by 'except ImportError':\n " + "\n ".join(problems) |
| 195 | + ) |
| 196 | + |
| 197 | + |
| 198 | +# --------------------------------------------------------------------------- |
| 199 | +# Check 2: phantom keyword arguments |
| 200 | +# --------------------------------------------------------------------------- |
| 201 | + |
| 202 | + |
| 203 | +def _function_params(module: str, func_name: str) -> set[str] | None: |
| 204 | + """Param names of an undecorated top-level function, else None. |
| 205 | +
|
| 206 | + None means "cannot safely check" (missing, decorated, a class, |
| 207 | + has **kwargs, or module is dynamic). |
| 208 | + """ |
| 209 | + info = _defined_in(module) |
| 210 | + if info is None or info[1]: |
| 211 | + return None |
| 212 | + tree = _parse(MODULES[module]) |
| 213 | + for node in tree.body: |
| 214 | + if ( |
| 215 | + isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef)) |
| 216 | + and node.name == func_name |
| 217 | + ): |
| 218 | + if node.decorator_list or node.args.kwarg is not None: |
| 219 | + return None |
| 220 | + params = [a.arg for a in node.args.posonlyargs] |
| 221 | + params += [a.arg for a in node.args.args] |
| 222 | + params += [a.arg for a in node.args.kwonlyargs] |
| 223 | + return set(params) |
| 224 | + return None |
| 225 | + |
| 226 | + |
| 227 | +def test_no_phantom_kwargs(): |
| 228 | + problems: list[str] = [] |
| 229 | + |
| 230 | + for current, path in sorted(MODULES.items()): |
| 231 | + tree = _parse(path) |
| 232 | + |
| 233 | + # local alias -> (target_module, original_name), from ALL |
| 234 | + # ImportFroms in the file, including inside function bodies. |
| 235 | + imported: dict[str, tuple[str, str]] = {} |
| 236 | + for node in ast.walk(tree): |
| 237 | + if isinstance(node, ast.ImportFrom): |
| 238 | + target = _resolve_relative(current, node) |
| 239 | + if target and ( |
| 240 | + target == PACKAGE_NAME or target.startswith(PACKAGE_NAME + ".") |
| 241 | + ): |
| 242 | + for alias in node.names: |
| 243 | + if alias.name != "*": |
| 244 | + imported[alias.asname or alias.name] = ( |
| 245 | + target, |
| 246 | + alias.name, |
| 247 | + ) |
| 248 | + |
| 249 | + if not imported: |
| 250 | + continue |
| 251 | + |
| 252 | + for node in ast.walk(tree): |
| 253 | + if not isinstance(node, ast.Call): |
| 254 | + continue |
| 255 | + if not isinstance(node.func, ast.Name): |
| 256 | + continue |
| 257 | + if node.func.id not in imported: |
| 258 | + continue |
| 259 | + target_module, original = imported[node.func.id] |
| 260 | + params = _function_params(target_module, original) |
| 261 | + if params is None: |
| 262 | + continue |
| 263 | + for kw in node.keywords: |
| 264 | + if kw.arg is not None and kw.arg not in params: |
| 265 | + problems.append( |
| 266 | + f"{path}:{node.lineno}: call to " |
| 267 | + f"{target_module}.{original}(... {kw.arg}=...) but " |
| 268 | + f"its parameters are {sorted(params)}" |
| 269 | + ) |
| 270 | + |
| 271 | + assert not problems, ( |
| 272 | + "Keyword arguments passed to internal functions that do not " |
| 273 | + "accept them (TypeError at call time):\n " + "\n ".join(problems) |
| 274 | + ) |
0 commit comments