From 701fa853a3c1713059b231a5751961de096116c1 Mon Sep 17 00:00:00 2001 From: voorhs Date: Mon, 22 Jun 2026 23:10:57 +0300 Subject: [PATCH 01/16] docs: design spec for metadata-driven require(extra) guard (#322) Co-Authored-By: Claude Opus 4.8 --- ...026-06-22-require-extra-resolver-design.md | 170 ++++++++++++++++++ 1 file changed, 170 insertions(+) create mode 100644 docs/superpowers/specs/2026-06-22-require-extra-resolver-design.md diff --git a/docs/superpowers/specs/2026-06-22-require-extra-resolver-design.md b/docs/superpowers/specs/2026-06-22-require-extra-resolver-design.md new file mode 100644 index 000000000..156225905 --- /dev/null +++ b/docs/superpowers/specs/2026-06-22-require-extra-resolver-design.md @@ -0,0 +1,170 @@ +# Design: metadata-driven `require(extra)` dependency guard + +**Date:** 2026-06-22 +**Issue:** [#322](https://github.com/voorhs/AutoIntent/issues/322) — bert scorer: opaque ImportError when `accelerate` is missing (require() guard checks only `transformers`) +**Status:** Approved, pending implementation plan + +## Problem + +The current `require(dependency, extra=None)` utility (`src/autointent/_utils.py`) tries to +`importlib.import_module(dependency)` and raises an informative `ImportError` naming the pip +extra if the import fails. Two structural weaknesses: + +1. **Manual sync burden.** Each `require` call site hard-codes a module name and an extra label. + These must be kept in sync with `pyproject.toml` by hand. When an extra gains a dependency, + every relevant call site must be updated, and it is easy to miss one. +2. **No version checking.** It only checks *presence* (importability), never whether the installed + version satisfies the constraint declared in `pyproject.toml`. + +Issue #322 is the concrete failure: `BertScorer.__init__` calls `require("transformers", "transformers")`, +which passes whenever `transformers` is importable — even when `accelerate` (pulled by the +`transformers[torch]` extra, required by HF `Trainer`) is absent. The user then hits a raw, deep +`ImportError` from inside `Trainer` instead of AutoIntent's "install `autointent[transformers]`" message. + +## Goal + +Refactor `require` to take an **autointent extra name** and validate that **every** dependency of +that extra — recursively, including nested third-party extras such as `transformers[torch] → accelerate` +— is installed **and** version-satisfied, raising a single aggregated, actionable `ImportError` +otherwise. + +This removes the manual sync burden (single source of truth = the package metadata baked from +`pyproject.toml`), adds version checking, and fixes #322 structurally — with no hand-added +`accelerate` guard. + +## Key technical premise + +`pyproject.toml` is **not** shipped in the wheel and cannot be parsed at runtime. It does not need to +be: the build process bakes the same requirements (with version specifiers and `extra` markers) into +the installed package metadata, which `importlib.metadata` reads at runtime. Verified against the +installed `autointent`: + +``` +transformers[torch]>=4.49.0,<5.0.0 ; extra == 'transformers' +``` + +and, recursing into `transformers`' own metadata: + +``` +torch>=2.2 ; extra == "torch" +accelerate>=0.26.0 ; extra == "torch" +``` + +So the single source of truth is the installed metadata, available everywhere the package is installed. + +## Decisions (from brainstorming Q&A) + +- **Presence check method:** metadata version check via `importlib.metadata` (not import-based). Faster, + no heavy imports (e.g. torch), enables version checking and recursive extra resolution. +- **Resolution depth:** full recursive walk of nested extras, cycle-guarded. This is what catches the + nested `accelerate` requirement automatically. +- **Rollout:** clean replace — change the signature to `require(extra: str)` and migrate every call + site in this PR. `require` is internal (`autointent._utils`, ~20 call sites, no external contract). + +## Design + +### New module `src/autointent/_deps.py` + +Houses the dependency-resolution logic, keeping it out of `_utils.py` (which imports `torch` at module +load) and making it trivially unit-testable. + +- **`require(extra: str, *, dist: str = "autointent") -> None`** — public entry point. Resolves the + extra's full (recursive) leaf-requirement set and validates each. Raises `ImportError` with an + aggregated message if anything is missing or outdated; returns `None` otherwise. + +- **`_iter_extra_reqs(dist, extra)`** — read `importlib.metadata.requires(dist)` (treat `None` as + empty), parse each entry with `packaging.requirements.Requirement`, and keep those whose `.marker` + evaluates true for `{"extra": extra}` combined with the current environment. `packaging` fills + environment markers (`python_version`, `sys_platform`, …) from the running interpreter, so + platform-conditional dependencies resolve correctly. + +- **`_resolve(dist, extra, seen)`** — yields leaf requirements (those with no further extras) reachable + from `dist`'s `extra`, recursing into nested extras. For a requirement like + `transformers[torch]>=4.49.0,<5.0.0` it yields the `transformers` constraint itself and then recurses + into `transformers`' `torch` extra. Cycle-guarded via `seen: set[tuple[str, frozenset[str]]]`. + +- **`_check(req) -> str | None`** — `importlib.metadata.version(req.name)`; return a human-readable + problem string if the dist is absent (`PackageNotFoundError`) or the installed version is not in + `req.specifier`; otherwise `None`. + +- Distribution/extra names normalized with `packaging.utils.canonicalize_name` on both sides for + hyphen/underscore robustness. + +- `_utils.py` re-exports `require` (`from autointent._deps import require`) so existing + `from autointent._utils import require` import lines stay untouched. + +- **Caching:** memoize the pure resolution step (extra → tuple of leaf requirements) with + `functools.lru_cache` keyed by `(dist, extra)`, so hot module-init paths do not re-walk metadata. + Version checks run on each call (cheap). + +### The #322 mechanism, concretely + +`transformers[torch]>=4.49.0,<5.0.0` parses to `name="transformers"`, `extras={"torch"}`, plus the +specifier. `_resolve` checks the `transformers` version against the specifier, then recurses via +`_iter_extra_reqs("transformers", "torch")` → `accelerate>=0.26.0`, `torch>=2.2`. A missing `accelerate` +surfaces here, named explicitly, with the `autointent[transformers]` install hint. + +### Error shape (aggregated) + +``` +ImportError: Feature requires extra 'transformers', but dependencies are missing or outdated: + - accelerate>=0.26.0 (not installed) + - transformers>=4.49.0,<5.0.0 (installed: 4.30.0) +Install with: pip install 'autointent[transformers]' +``` + +### pyproject change + +Add `packaging` to **core** `dependencies` (it is currently present only transitively — promoting it to +a declared dependency avoids repeating the exact transitive-reliance mistake behind #322). Conservative +floor, e.g. `packaging (>=23.2)`; the `Requirement` / `SpecifierSet` / `canonicalize_name` / +`Marker.evaluate` APIs used here have been stable for years. The exact floor will be confirmed during +implementation. + +### Call-site migration + +All ~20 sites collapse to passing just the extra name. Each is mapped precisely during implementation, +preserving current intent. Representative cases: + +| File | Before | After | +| --- | --- | --- | +| `modules/scoring/_bert.py` | `require("transformers", "transformers")` | `require("transformers")` | +| `modules/scoring/_ptuning/ptuning.py`, `_lora/lora.py` | `require("peft", extra="peft")` | `require("peft")` | +| `modules/scoring/_catboost/catboost_scorer.py` | `require("catboost", extra="catboost")` | `require("catboost")` | +| `_wrappers/embedder/vllm.py` | `require("vllm", extra="vllm")` | `require("vllm")` | +| `_wrappers/embedder/openai.py` | `require("tiktoken","openai")` + `require("openai","openai")` | `require("openai")` | +| `generation/_generator.py` | `require("openai","openai")` | `require("openai")` | +| `_wrappers/ranker.py` | `require("sentence_transformers", extra="sentence-transformers")` | `require("sentence-transformers")` | +| `_wrappers/embedder/sentence_transformers.py` | `require("transformers")` + `require("accelerate")` + `require("sentence_transformers")` | collapse (accelerate covered by recursion) | +| `_dump_tools/unit_dumpers.py` | peft / transformers / catboost requires | `require("peft")` / `require("transformers")` / `require("catboost")` | + +### Testing (TDD, synthetic metadata) + +Unit tests inject a fake requirement graph (monkeypatch `_iter_extra_reqs` or the underlying +`importlib.metadata.requires` / `version`) so assertions do not depend on what CI happens to install: + +- all dependencies present and version-satisfied → no raise +- a leaf dependency missing → `ImportError` naming the dist and the extra + install hint +- installed version too old → `ImportError` showing installed vs required +- **nested-extra dependency missing (the `accelerate` case) → `ImportError` names `accelerate`** (proves #322) +- cyclic extras → resolution terminates +- unknown extra name → clear error + +Plus one real-metadata smoke test against a known-present extra to catch wiring regressions. + +Per project convention, tests are verified via CI (push branch + coverage dispatch), not run locally. + +## Out of scope + +- Changing what each module actually imports at runtime. +- Resolving full transitive *non-extra* dependency trees — we only walk declared extras, which is where + the relevant guards belong. +- Source checkouts with no installed metadata (dev installs always have `.dist-info`/PEP 660 metadata). + +## Rejected alternatives + +- **Import-based presence check** — slower, drags in heavy imports (torch), and needs an + import-name ↔ dist-name map. +- **One-level resolution (autointent extras only)** — would not catch `accelerate`, since it is nested + inside `transformers`' `torch` extra; #322 would still need a manual guard. +- **Hand-maintained extra → deps map** — still a sync burden, just centralized. From 33c16c640ecc35e29ce2cc762d141e665109a7e9 Mon Sep 17 00:00:00 2001 From: voorhs Date: Mon, 22 Jun 2026 23:20:34 +0300 Subject: [PATCH 02/16] docs: implementation plan for metadata-driven require(extra) (#322) Co-Authored-By: Claude Opus 4.8 --- .../2026-06-22-require-extra-resolver.md | 685 ++++++++++++++++++ 1 file changed, 685 insertions(+) create mode 100644 docs/superpowers/plans/2026-06-22-require-extra-resolver.md diff --git a/docs/superpowers/plans/2026-06-22-require-extra-resolver.md b/docs/superpowers/plans/2026-06-22-require-extra-resolver.md new file mode 100644 index 000000000..797617bb2 --- /dev/null +++ b/docs/superpowers/plans/2026-06-22-require-extra-resolver.md @@ -0,0 +1,685 @@ +# Metadata-driven `require(extra)` Implementation Plan + +> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking. + +**Goal:** Replace the import-based `require(dependency, extra)` guard with `require(extra)`, which reads installed package metadata to verify every dependency of an autointent extra — recursively, including nested extras like `transformers[torch] → accelerate` — is installed and version-satisfied. + +**Architecture:** A new focused module `src/autointent/_deps.py` resolves an extra's requirement graph via `importlib.metadata` (the build bakes `pyproject.toml`'s requirements into the wheel as metadata; the source file is not shipped) and validates each leaf with `packaging`. `require` moves there; all ~20 call sites pass just the extra name. Fixes #322 structurally — the missing `accelerate` is caught by recursing into `transformers`' own `torch` extra, with no hand-added guard. + +**Tech Stack:** Python 3.10+, `importlib.metadata` (stdlib), `packaging` (promoted to a core dependency), pytest with `monkeypatch`. + +## Global Constraints + +- **Lint:** `ruff` runs with `select = ["ALL"]`, line-length 120, target `py310`, **google docstring convention**. Every module/function needs a google-style docstring; every parameter and return needs a type annotation. Raise exceptions via a pre-assigned `msg` variable (satisfies `EM`/`TRY003`). +- **Types:** `mypy --strict` on **python 3.10**. `from __future__ import annotations` at the top of every new `.py`. When building a typed `set[str]`/`dict` from `packaging.utils.canonicalize_name` (which returns `NormalizedName`), wrap in `str(...)` — `set` is invariant so `set[NormalizedName]` is not assignable to `set[str]`. +- **New core dependency:** add `packaging (>=23.2)` to `[project].dependencies` (no upper cap — avoids resolver conflicts; it is currently present only transitively, the same mistake #322 is about). +- **Testing:** Do **NOT** run the full pytest suite locally — it can freeze the machine. The new `tests/test_deps.py` is pure Python (monkeypatched metadata, no model loading) and is the **only** suite you may run locally, via `pytest tests/test_deps.py -q`. Full-suite + call-site migration verification goes through **CI** (push branch + coverage dispatch). `ruff` and `mypy` may be run locally. +- **Do not** commit `uv.lock` (gitignored by design; CI re-solves deps). +- **Extra names are passed verbatim as declared in `pyproject.toml`** (e.g. `"sentence-transformers"` with a hyphen). + +--- + +### Task 1: Create `_deps.py` with `_check` + add `packaging` core dep + +**Files:** +- Modify: `pyproject.toml` (add `packaging` to `[project].dependencies`) +- Create: `src/autointent/_deps.py` +- Create: `tests/test_deps.py` + +**Interfaces:** +- Produces: `_check(req: packaging.requirements.Requirement) -> str | None` — returns a one-line problem description if the dist named by `req` is not installed or its installed version is outside `req.specifier`, else `None`. + +- [ ] **Step 1: Add `packaging` to core dependencies** + +In `pyproject.toml`, inside `[project]` `dependencies = [ ... ]`, add one line (keep existing entries): + +```toml + "packaging (>=23.2)", +``` + +- [ ] **Step 2: Create the module with its docstring, imports, and `_check`** + +Create `src/autointent/_deps.py`: + +```python +"""Validate optional-extra dependencies from installed package metadata. + +The :func:`require` guard checks that every dependency of an ``autointent`` extra +is installed and version-satisfied. It reads the metadata that the build baked into +the installed distribution (via :mod:`importlib.metadata`) rather than the source +``pyproject.toml``, which is not shipped in the wheel. Nested extras are resolved +recursively, so e.g. the ``transformers`` extra (``transformers[torch]``) +transitively requires ``accelerate`` and that is checked too. +""" + +from __future__ import annotations + +from importlib import metadata + +from packaging.requirements import Requirement + +_DIST = "autointent" + + +def _check(req: Requirement) -> str | None: + """Check a single requirement against the installed environment. + + Args: + req: The parsed requirement to validate. + + Returns: + A human-readable problem description if the distribution is missing or its + installed version does not satisfy ``req.specifier``; ``None`` otherwise. + """ + try: + installed = metadata.version(req.name) + except metadata.PackageNotFoundError: + return f"{req.name}{req.specifier} (not installed)" + if req.specifier and not req.specifier.contains(installed, prereleases=True): + return f"{req.name}{req.specifier} (installed: {installed})" + return None +``` + +- [ ] **Step 3: Write the failing test and shared metadata-faking helper** + +Create `tests/test_deps.py`: + +```python +import re +from importlib import metadata + +import pytest + +import autointent._deps as deps +from packaging.requirements import Requirement + +_EXTRA_RE = re.compile(r"""extra\s*==\s*['"]([^'"]+)['"]""") + + +class _FakeMeta: + def __init__(self, extras): + self._extras = extras + + def get_all(self, name, failobj=None): + if name == "Provides-Extra": + return list(self._extras) + return failobj + + +def _patch_metadata(monkeypatch, requires_map, versions): + """Patch importlib.metadata so deps.* sees a synthetic dependency graph. + + requires_map: {dist_name: [PEP 508 requirement string, ...]} + versions: {dist_name: installed_version_string} (absent key => not installed) + """ + def fake_requires(dist): + return requires_map.get(dist, []) + + def fake_version(name): + if name not in versions: + raise metadata.PackageNotFoundError(name) + return versions[name] + + def fake_metadata(dist): + extras = sorted({e for s in requires_map.get(dist, []) for e in _EXTRA_RE.findall(s)}) + return _FakeMeta(extras) + + monkeypatch.setattr(deps.metadata, "requires", fake_requires) + monkeypatch.setattr(deps.metadata, "version", fake_version) + monkeypatch.setattr(deps.metadata, "metadata", fake_metadata) + + +def test_check_returns_none_when_satisfied(monkeypatch): + _patch_metadata(monkeypatch, {}, {"catboost": "1.5.0"}) + assert deps._check(Requirement("catboost>=1.2.8,<2.0.0")) is None + + +def test_check_reports_missing(monkeypatch): + _patch_metadata(monkeypatch, {}, {}) + problem = deps._check(Requirement("catboost>=1.2.8")) + assert problem is not None + assert "catboost" in problem + assert "not installed" in problem + + +def test_check_reports_outdated(monkeypatch): + _patch_metadata(monkeypatch, {}, {"catboost": "1.0.0"}) + problem = deps._check(Requirement("catboost>=1.2.8,<2.0.0")) + assert problem is not None + assert "installed: 1.0.0" in problem +``` + +- [ ] **Step 4: Run the tests** + +Run: `pytest tests/test_deps.py -q` +Expected: 3 passed. + +- [ ] **Step 5: Lint and type-check** + +Run: `ruff check src/autointent/_deps.py tests/test_deps.py && mypy src/autointent/_deps.py` +Expected: no errors. + +- [ ] **Step 6: Commit** + +```bash +git add pyproject.toml src/autointent/_deps.py tests/test_deps.py +git commit -m "feat(deps): add _check leaf version validator + packaging core dep (#322)" +``` + +--- + +### Task 2: `_iter_extra_reqs` — read a dist's requirements for one extra + +**Files:** +- Modify: `src/autointent/_deps.py` +- Test: `tests/test_deps.py` + +**Interfaces:** +- Consumes: nothing new. +- Produces: `_iter_extra_reqs(dist: str, extra: str) -> list[Requirement]` — the requirements of `dist` activated by `extra` for the current environment. Base dependencies (no extra marker, or only an environment marker) are excluded. + +- [ ] **Step 1: Write the failing test** + +Add to `tests/test_deps.py`: + +```python +def test_iter_extra_reqs_selects_only_extra_members(monkeypatch): + _patch_metadata( + monkeypatch, + {"autointent": [ + "numpy>=1.0 ; python_version >= '3.0'", # base dep w/ env marker -> excluded + "torch>=2.0", # base dep, no marker -> excluded + "catboost>=1.2.8,<2.0.0 ; extra == 'catboost'", # extra member -> included + "peft>=0.10.0 ; extra == 'peft'", # different extra -> excluded + ]}, + {}, + ) + reqs = deps._iter_extra_reqs("autointent", "catboost") + assert {r.name for r in reqs} == {"catboost"} +``` + +- [ ] **Step 2: Run it to confirm it fails** + +Run: `pytest tests/test_deps.py::test_iter_extra_reqs_selects_only_extra_members -q` +Expected: FAIL with `AttributeError: module 'autointent._deps' has no attribute '_iter_extra_reqs'`. + +- [ ] **Step 3: Implement `_iter_extra_reqs`** + +In `src/autointent/_deps.py`, add `from packaging.utils import canonicalize_name` to the imports (keep the import block sorted: `Requirement` then `canonicalize_name`), and append: + +```python +def _iter_extra_reqs(dist: str, extra: str) -> list[Requirement]: + """Return the requirements of ``dist`` activated by ``extra``. + + A requirement is included only when its marker is satisfied *because* of the + extra: it must evaluate true with the extra set and false with no extra. This + excludes base dependencies that merely carry an environment marker. + + Args: + dist: Distribution name whose metadata is read. + extra: Extra name whose dependencies are wanted. + + Returns: + The parsed requirements activated by ``extra`` in the current environment. + """ + target = str(canonicalize_name(extra)) + result: list[Requirement] = [] + for spec in metadata.requires(dist) or []: + req = Requirement(spec) + marker = req.marker + if marker is None: + continue + if marker.evaluate({"extra": target}) and not marker.evaluate({"extra": ""}): + result.append(req) + return result +``` + +- [ ] **Step 4: Run the tests** + +Run: `pytest tests/test_deps.py -q` +Expected: all passed. + +- [ ] **Step 5: Lint and type-check** + +Run: `ruff check src/autointent/_deps.py tests/test_deps.py && mypy src/autointent/_deps.py` +Expected: no errors. + +- [ ] **Step 6: Commit** + +```bash +git add src/autointent/_deps.py tests/test_deps.py +git commit -m "feat(deps): resolve a dist's requirements for a single extra" +``` + +--- + +### Task 3: `_resolve` — recursive walk with cycle guard + cached entry point + +**Files:** +- Modify: `src/autointent/_deps.py` +- Test: `tests/test_deps.py` + +**Interfaces:** +- Consumes: `_iter_extra_reqs`. +- Produces: + - `_resolve(dist: str, extra: str, seen: set[tuple[str, str]]) -> list[Requirement]` — every leaf requirement reachable from `dist[extra]`, recursing through nested extras, cycle-guarded by `seen`. + - `_resolve_cached(dist: str, extra: str) -> tuple[Requirement, ...]` — memoized wrapper (call this from `require`). + +- [ ] **Step 1: Write the failing tests + cache-clear fixture** + +Add to `tests/test_deps.py` (the autouse fixture keeps the lru_cache from leaking synthetic graphs across tests): + +```python +@pytest.fixture(autouse=True) +def _clear_resolve_cache(): + deps._resolve_cached.cache_clear() + yield + deps._resolve_cached.cache_clear() + + +def test_resolve_recurses_into_nested_extra(monkeypatch): + _patch_metadata( + monkeypatch, + { + "autointent": ["transformers[torch]>=4.49.0,<5.0.0 ; extra == 'transformers'"], + "transformers": [ + "torch>=2.2 ; extra == 'torch'", + "accelerate>=0.26.0 ; extra == 'torch'", + ], + }, + {}, + ) + reqs = deps._resolve("autointent", "transformers", set()) + assert {r.name for r in reqs} == {"transformers", "torch", "accelerate"} + + +def test_resolve_terminates_on_cycle(monkeypatch): + _patch_metadata( + monkeypatch, + {"pkg": [ + "pkg[b]>=1.0 ; extra == 'a'", + "pkg[a]>=1.0 ; extra == 'b'", + ]}, + {}, + ) + reqs = deps._resolve("pkg", "a", set()) + assert {r.name for r in reqs} == {"pkg"} + + +def test_resolve_cached_returns_tuple(monkeypatch): + _patch_metadata( + monkeypatch, + {"autointent": ["catboost>=1.2.8 ; extra == 'catboost'"]}, + {}, + ) + result = deps._resolve_cached("autointent", "catboost") + assert isinstance(result, tuple) + assert {r.name for r in result} == {"catboost"} +``` + +- [ ] **Step 2: Run them to confirm they fail** + +Run: `pytest tests/test_deps.py -k resolve -q` +Expected: FAIL with `AttributeError: module 'autointent._deps' has no attribute '_resolve'`. + +- [ ] **Step 3: Implement `_resolve` and `_resolve_cached`** + +In `src/autointent/_deps.py`, add `from functools import lru_cache` to the imports (above `from importlib import metadata`), and append: + +```python +def _resolve(dist: str, extra: str, seen: set[tuple[str, str]]) -> list[Requirement]: + """Recursively collect every leaf requirement activated by ``dist[extra]``. + + Each activated requirement is returned for version checking, and any nested + extras it declares (e.g. ``transformers[torch]``) are resolved in turn. + + Args: + dist: Distribution name to start from. + extra: Extra name to resolve. + seen: Visited ``(dist, extra)`` pairs, used to break dependency cycles. + + Returns: + The flattened list of requirements to validate. + """ + key = (str(canonicalize_name(dist)), str(canonicalize_name(extra))) + if key in seen: + return [] + seen.add(key) + + leaves: list[Requirement] = [] + for req in _iter_extra_reqs(dist, extra): + leaves.append(req) + for nested in req.extras: + leaves.extend(_resolve(req.name, nested, seen)) + return leaves + + +@lru_cache(maxsize=None) +def _resolve_cached(dist: str, extra: str) -> tuple[Requirement, ...]: + """Memoized :func:`_resolve`; the metadata graph shape is stable per process. + + Args: + dist: Distribution name to start from. + extra: Extra name to resolve. + + Returns: + The resolved requirements as an immutable tuple. + """ + return tuple(_resolve(dist, extra, set())) +``` + +- [ ] **Step 4: Run the tests** + +Run: `pytest tests/test_deps.py -q` +Expected: all passed. + +- [ ] **Step 5: Lint and type-check** + +Run: `ruff check src/autointent/_deps.py tests/test_deps.py && mypy src/autointent/_deps.py` +Expected: no errors. + +- [ ] **Step 6: Commit** + +```bash +git add src/autointent/_deps.py tests/test_deps.py +git commit -m "feat(deps): recursive extra resolution with cycle guard + cache" +``` + +--- + +### Task 4: `require` — public guard (unknown-extra check, aggregation, message) + +**Files:** +- Modify: `src/autointent/_deps.py` +- Test: `tests/test_deps.py` + +**Interfaces:** +- Consumes: `_resolve_cached`, `_check`. +- Produces: `require(extra: str, *, dist: str = "autointent") -> None`. Raises `ValueError` if `dist` declares no such extra (typo guard); raises `ImportError` listing every missing/outdated dependency with a `pip install '[]'` hint; otherwise returns `None`. This is the symbol every call site imports. + +- [ ] **Step 1: Write the failing tests** + +Add to `tests/test_deps.py`: + +```python +def test_require_passes_when_all_present(monkeypatch): + _patch_metadata( + monkeypatch, + {"autointent": ["catboost>=1.2.8,<2.0.0 ; extra == 'catboost'"]}, + {"catboost": "1.5.0"}, + ) + deps.require("catboost") # must not raise + + +def test_require_raises_for_missing_leaf(monkeypatch): + _patch_metadata( + monkeypatch, + {"autointent": ["catboost>=1.2.8,<2.0.0 ; extra == 'catboost'"]}, + {}, + ) + with pytest.raises(ImportError) as exc: + deps.require("catboost") + text = str(exc.value) + assert "catboost" in text + assert "not installed" in text + assert "pip install 'autointent[catboost]'" in text + + +def test_require_raises_for_outdated_version(monkeypatch): + _patch_metadata( + monkeypatch, + {"autointent": ["catboost>=1.2.8,<2.0.0 ; extra == 'catboost'"]}, + {"catboost": "1.0.0"}, + ) + with pytest.raises(ImportError) as exc: + deps.require("catboost") + assert "installed: 1.0.0" in str(exc.value) + + +def test_require_detects_missing_nested_accelerate(monkeypatch): + # Regression for #322: accelerate lives in transformers' own [torch] extra, + # so a transformers-present-but-accelerate-absent env must still be flagged. + _patch_metadata( + monkeypatch, + { + "autointent": ["transformers[torch]>=4.49.0,<5.0.0 ; extra == 'transformers'"], + "transformers": [ + "torch>=2.2 ; extra == 'torch'", + "accelerate>=0.26.0 ; extra == 'torch'", + ], + }, + {"transformers": "4.49.0", "torch": "2.2.0"}, # accelerate absent + ) + with pytest.raises(ImportError) as exc: + deps.require("transformers") + assert "accelerate" in str(exc.value) + + +def test_require_rejects_unknown_extra(monkeypatch): + _patch_metadata( + monkeypatch, + {"autointent": ["catboost>=1.2.8 ; extra == 'catboost'"]}, + {"catboost": "1.5.0"}, + ) + with pytest.raises(ValueError, match="no extra 'transfomers'"): + deps.require("transfomers") # typo +``` + +- [ ] **Step 2: Run them to confirm they fail** + +Run: `pytest tests/test_deps.py -k require -q` +Expected: FAIL with `AttributeError: module 'autointent._deps' has no attribute 'require'`. + +- [ ] **Step 3: Implement `_provides_extras` and `require`** + +In `src/autointent/_deps.py`, append: + +```python +def _provides_extras(dist: str) -> set[str]: + """Return the normalized set of extras declared by ``dist``. + + Args: + dist: Distribution name whose metadata is read. + + Returns: + Normalized extra names from the distribution's ``Provides-Extra`` metadata. + """ + md = metadata.metadata(dist) + return {str(canonicalize_name(e)) for e in (md.get_all("Provides-Extra") or [])} + + +def require(extra: str, *, dist: str = _DIST) -> None: + """Ensure every dependency of an ``autointent`` extra is installed and current. + + Args: + extra: The extra to validate, e.g. ``"transformers"``. + dist: Distribution that declares the extra. Defaults to ``"autointent"``. + + Raises: + ValueError: If ``dist`` declares no such ``extra`` (typically a typo). + ImportError: If any required dependency is missing or its installed version + does not satisfy the constraint declared in the metadata. + """ + known = _provides_extras(dist) + if str(canonicalize_name(extra)) not in known: + msg = f"'{dist}' declares no extra '{extra}'. Known extras: {', '.join(sorted(known))}." + raise ValueError(msg) + + problems: list[str] = [] + for req in _resolve_cached(dist, extra): + problem = _check(req) + if problem is not None and problem not in problems: + problems.append(problem) + + if problems: + bullets = "\n".join(f" - {p}" for p in problems) + msg = ( + f"Feature requires extra '{extra}', but dependencies are missing or outdated:\n" + f"{bullets}\n" + f"Install with: pip install '{dist}[{extra}]'" + ) + raise ImportError(msg) +``` + +- [ ] **Step 4: Run the full module test file** + +Run: `pytest tests/test_deps.py -q` +Expected: all passed. + +- [ ] **Step 5: Lint and type-check** + +Run: `ruff check src/autointent/_deps.py tests/test_deps.py && mypy src/autointent/_deps.py` +Expected: no errors. + +- [ ] **Step 6: Commit** + +```bash +git add src/autointent/_deps.py tests/test_deps.py +git commit -m "feat(deps): add require(extra) guard with aggregated, versioned errors (#322)" +``` + +--- + +### Task 5: Migrate all call sites + remove old `require` from `_utils.py` + +**Files:** +- Modify: `src/autointent/_utils.py` (remove old `require` + now-unused `import importlib`) +- Modify (import line `from autointent._utils import require` → `from autointent._deps import require`, and each `require(...)` call): + - `src/autointent/modules/scoring/_bert.py` + - `src/autointent/modules/scoring/_ptuning/ptuning.py` + - `src/autointent/modules/scoring/_lora/lora.py` + - `src/autointent/modules/scoring/_catboost/catboost_scorer.py` + - `src/autointent/_wrappers/embedder/vllm.py` + - `src/autointent/_wrappers/embedder/openai.py` + - `src/autointent/_wrappers/embedder/sentence_transformers.py` + - `src/autointent/_wrappers/ranker.py` + - `src/autointent/generation/_generator.py` + - `src/autointent/_dump_tools/unit_dumpers.py` + +**Interfaces:** +- Consumes: `require(extra: str)` from `autointent._deps` (Task 4). +- Produces: no new symbols. After this task no module imports `require` from `autointent._utils`, and no `require(` call passes more than one argument. + +- [ ] **Step 1: Remove the old `require` and its import from `_utils.py`** + +In `src/autointent/_utils.py`: delete the entire `require` function (the `def require(...)` block and its body) and delete the top-level `import importlib` line (it was only used by `require`). Leave `import torch`, `from typing import TypeVar`, `_funcs_to_dict`, and `detect_device` untouched. + +- [ ] **Step 2: Update every import line** + +In each of the 10 files listed above, change: + +```python +from autointent._utils import require +``` + +to: + +```python +from autointent._deps import require +``` + +- [ ] **Step 3: Update the call sites (one argument = the extra name)** + +Apply these exact replacements: + +- `src/autointent/modules/scoring/_bert.py`: `require("transformers", "transformers")` → `require("transformers")` +- `src/autointent/modules/scoring/_ptuning/ptuning.py`: `require("peft", extra="peft")` → `require("peft")` +- `src/autointent/modules/scoring/_lora/lora.py`: `require("peft", extra="peft")` → `require("peft")` +- `src/autointent/modules/scoring/_catboost/catboost_scorer.py`: `require("catboost", extra="catboost")` → `require("catboost")` +- `src/autointent/_wrappers/embedder/vllm.py`: `require("vllm", extra="vllm")` → `require("vllm")` +- `src/autointent/_wrappers/embedder/openai.py`: `require("tiktoken", "openai")` → `require("openai")` **and** `require("openai", "openai")` → `require("openai")` +- `src/autointent/generation/_generator.py`: `require("openai", "openai")` → `require("openai")` +- `src/autointent/_wrappers/ranker.py`: `require("sentence_transformers", extra="sentence-transformers")` → `require("sentence-transformers")` +- `src/autointent/_wrappers/embedder/sentence_transformers.py`: + - line ~45: `require("transformers", extra="transformers")` → `require("transformers")` + - line ~133: `require("sentence_transformers", extra="sentence-transformers")` → `require("sentence-transformers")` + - the training block (three consecutive calls): + ```python + require("sentence_transformers", extra="sentence-transformers") + require("transformers", extra="transformers") + require("accelerate", extra="transformers") + ``` + becomes (drop the `accelerate` line — it is now covered by recursing into the `transformers` extra): + ```python + require("sentence-transformers") + require("transformers") + ``` +- `src/autointent/_dump_tools/unit_dumpers.py` — replace all occurrences: + - `require("peft", extra="peft")` → `require("peft")` + - `require("transformers", extra="transformers")` → `require("transformers")` + - `require("catboost", extra="catboost")` → `require("catboost")` + +- [ ] **Step 4: Verify no stale call shape or import remains** + +Run: +```bash +grep -rn 'from autointent._utils import require' src/autointent --include='*.py'; \ +grep -rnE 'require\([^)]*,' src/autointent --include='*.py' | grep -v 'def require' +``` +Expected: **no output** from either grep (no old import; no `require(` call with a second argument). + +- [ ] **Step 5: Lint and type-check the whole package** + +Run: `ruff check src/autointent && mypy src/autointent` +Expected: no errors. + +- [ ] **Step 6: Commit** + +```bash +git add src/autointent +git commit -m "refactor(deps): migrate all require() call sites to extra-name guard (#322)" +``` + +--- + +### Task 6: Real-metadata smoke test + push to CI + +**Files:** +- Modify: `tests/test_deps.py` (add one un-monkeypatched wiring test) + +**Interfaces:** +- Consumes: `_resolve_cached` against the real installed `autointent` metadata. +- Produces: no new symbols. + +- [ ] **Step 1: Add a real-metadata wiring test** + +Add to `tests/test_deps.py` (no `monkeypatch` — exercises the real `importlib.metadata` reads to catch wiring regressions; asserts only resolution shape, not install state, so it is deterministic regardless of which extras CI installs): + +```python +def test_resolve_reads_real_autointent_metadata(): + reqs = deps._resolve_cached("autointent", "catboost") + assert any(r.name == "catboost" for r in reqs) +``` + +- [ ] **Step 2: Run the isolated test file** + +Run: `pytest tests/test_deps.py -q` +Expected: all passed. + +- [ ] **Step 3: Lint and type-check** + +Run: `ruff check src/autointent tests/test_deps.py && mypy src/autointent` +Expected: no errors. + +- [ ] **Step 4: Commit** + +```bash +git add tests/test_deps.py +git commit -m "test(deps): smoke-test require resolution against real metadata" +``` + +- [ ] **Step 5: Push the branch and trigger CI** + +Push the working branch and open a PR (or trigger the coverage dispatch) so the **full** suite — including the migrated call-site modules — runs in CI. Do not run the full suite locally. Confirm CI is green before considering the work done; link the run in the PR referencing #322. + +--- + +## Notes / deviations from the spec + +- **No re-export from `_utils`.** The spec proposed re-exporting `require` from `autointent._utils` to keep import lines untouched. Under `ruff select = ["ALL"]` a bare re-export trips `F401`, and the redundant-alias workaround trips `PLC0414`. Since every call-site file is edited for the signature change anyway, updating its import line to `from autointent._deps import require` is cleaner and lint-clean. The dependency logic now has a single home (`_deps.py`). + +## Self-review + +- **Spec coverage:** new `_deps.py` (Tasks 1–4); metadata-not-pyproject premise (module docstring, Task 1); metadata version check (`_check`, Task 1); recursive nested-extra resolution incl. accelerate/#322 (Task 3 + Task 4 regression test); aggregated actionable error (Task 4); `packaging` promoted to core dep (Task 1); all ~20 call sites migrated + clean replace (Task 5); synthetic-metadata TDD for every scenario incl. cycle/unknown-extra (Tasks 1–4); real-metadata smoke test (Task 6); CI verification per project convention (Task 6, Global Constraints). The one spec deviation (re-export) is documented above. +- **Placeholder scan:** none — every code/test step contains complete code; every command has expected output. +- **Type consistency:** `_check`, `_iter_extra_reqs`, `_resolve`, `_resolve_cached`, `_provides_extras`, `require` keep identical signatures across the tasks that define and call them; `_resolve_cached` (not `_resolve`) is the symbol `require` consumes, matching Task 3's "Produces" and Task 4's "Consumes". From 08b267e02058457a0f28e870c2440d442f380d9f Mon Sep 17 00:00:00 2001 From: voorhs Date: Tue, 23 Jun 2026 00:03:46 +0300 Subject: [PATCH 03/16] docs: address plan review (UP033, import order, stale doc ref) (#322) Co-Authored-By: Claude Opus 4.8 --- .../plans/2026-06-22-require-extra-resolver.md | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/docs/superpowers/plans/2026-06-22-require-extra-resolver.md b/docs/superpowers/plans/2026-06-22-require-extra-resolver.md index 797617bb2..a47be0a27 100644 --- a/docs/superpowers/plans/2026-06-22-require-extra-resolver.md +++ b/docs/superpowers/plans/2026-06-22-require-extra-resolver.md @@ -88,10 +88,9 @@ Create `tests/test_deps.py`: import re from importlib import metadata -import pytest +from packaging.requirements import Requirement import autointent._deps as deps -from packaging.requirements import Requirement _EXTRA_RE = re.compile(r"""extra\s*==\s*['"]([^'"]+)['"]""") @@ -267,7 +266,7 @@ git commit -m "feat(deps): resolve a dist's requirements for a single extra" - [ ] **Step 1: Write the failing tests + cache-clear fixture** -Add to `tests/test_deps.py` (the autouse fixture keeps the lru_cache from leaking synthetic graphs across tests): +First add `import pytest` to the **third-party** import group of `tests/test_deps.py` — it must come *before* `from packaging.requirements import Requirement` (ruff's isort sorts straight `import` statements before `from` imports within a section). Then add the fixture and tests (the autouse fixture keeps the cache from leaking synthetic graphs across tests): ```python @pytest.fixture(autouse=True) @@ -324,7 +323,7 @@ Expected: FAIL with `AttributeError: module 'autointent._deps' has no attribute - [ ] **Step 3: Implement `_resolve` and `_resolve_cached`** -In `src/autointent/_deps.py`, add `from functools import lru_cache` to the imports (above `from importlib import metadata`), and append: +In `src/autointent/_deps.py`, add `from functools import cache` to the imports (above `from importlib import metadata`), and append (use `@cache`, not `@lru_cache(maxsize=None)` — ruff `ALL` raises `UP033` for the latter; `functools.cache` exists on py3.10 and still exposes `.cache_clear()`): ```python def _resolve(dist: str, extra: str, seen: set[tuple[str, str]]) -> list[Requirement]: @@ -354,7 +353,7 @@ def _resolve(dist: str, extra: str, seen: set[tuple[str, str]]) -> list[Requirem return leaves -@lru_cache(maxsize=None) +@cache def _resolve_cached(dist: str, extra: str) -> tuple[Requirement, ...]: """Memoized :func:`_resolve`; the metadata graph shape is stable per process. @@ -544,6 +543,7 @@ git commit -m "feat(deps): add require(extra) guard with aggregated, versioned e **Files:** - Modify: `src/autointent/_utils.py` (remove old `require` + now-unused `import importlib`) +- Modify: `user_guides/advanced/02_embedder_configuration.py` (stale prose reference to `autointent._utils.require`) - Modify (import line `from autointent._utils import require` → `from autointent._deps import require`, and each `require(...)` call): - `src/autointent/modules/scoring/_bert.py` - `src/autointent/modules/scoring/_ptuning/ptuning.py` @@ -609,6 +609,8 @@ Apply these exact replacements: - `require("transformers", extra="transformers")` → `require("transformers")` - `require("catboost", extra="catboost")` → `require("catboost")` +Then fix the stale prose reference in `user_guides/advanced/02_embedder_configuration.py` (line ~22): change `autointent._utils.require` to `autointent._deps.require` (it is documentation prose, not an import — no lint impact, just keeping the docs accurate). + - [ ] **Step 4: Verify no stale call shape or import remains** Run: @@ -677,6 +679,8 @@ Push the working branch and open a PR (or trigger the coverage dispatch) so the ## Notes / deviations from the spec - **No re-export from `_utils`.** The spec proposed re-exporting `require` from `autointent._utils` to keep import lines untouched. Under `ruff select = ["ALL"]` a bare re-export trips `F401`, and the redundant-alias workaround trips `PLC0414`. Since every call-site file is edited for the signature change anyway, updating its import line to `from autointent._deps import require` is cleaner and lint-clean. The dependency logic now has a single home (`_deps.py`). +- **Cycle-guard key type.** The spec sketched `seen: set[tuple[str, frozenset[str]]]`. The plan uses `set[tuple[str, str]]` because `_resolve` recurses one nested-extra name at a time (it iterates `req.extras` and recurses per name), so the key is `(dist, single-extra-name)`. Behaviourally equivalent for cycle-breaking; simpler. +- **Error-bullet ordering.** Problems are listed in resolution order (e.g. `transformers` before its nested `accelerate`), which is the reverse of the illustrative example in the spec. Cosmetic — no test asserts bullet order. ## Self-review From bba903db901fb92f2377d83f20114dc159aae868 Mon Sep 17 00:00:00 2001 From: voorhs Date: Tue, 23 Jun 2026 00:22:57 +0300 Subject: [PATCH 04/16] feat(deps): add _check leaf version validator + packaging core dep (#322) --- pyproject.toml | 1 + src/autointent/_deps.py | 38 +++++++++++++++++++++++++ tests/test_deps.py | 61 +++++++++++++++++++++++++++++++++++++++++ 3 files changed, 100 insertions(+) create mode 100644 src/autointent/_deps.py create mode 100644 tests/test_deps.py diff --git a/pyproject.toml b/pyproject.toml index 5f65407ae..5fbabaf86 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -45,6 +45,7 @@ dependencies = [ "aiometer (>=1.0.0,<2.0.0)", "aiofiles (>=24.1.0,<25.0.0)", "threadpoolctl (>=3.0.0,<4.0.0)", + "packaging (>=23.2)", ] [project.optional-dependencies] diff --git a/src/autointent/_deps.py b/src/autointent/_deps.py new file mode 100644 index 000000000..12c8dc2bd --- /dev/null +++ b/src/autointent/_deps.py @@ -0,0 +1,38 @@ +"""Validate optional-extra dependencies from installed package metadata. + +The :func:`require` guard checks that every dependency of an ``autointent`` extra +is installed and version-satisfied. It reads the metadata that the build baked into +the installed distribution (via :mod:`importlib.metadata`) rather than the source +``pyproject.toml``, which is not shipped in the wheel. Nested extras are resolved +recursively, so e.g. the ``transformers`` extra (``transformers[torch]``) +transitively requires ``accelerate`` and that is checked too. +""" + +from __future__ import annotations + +from importlib import metadata +from typing import TYPE_CHECKING + +if TYPE_CHECKING: + from packaging.requirements import Requirement + +_DIST = "autointent" + + +def _check(req: Requirement) -> str | None: + """Check a single requirement against the installed environment. + + Args: + req: The parsed requirement to validate. + + Returns: + A human-readable problem description if the distribution is missing or its + installed version does not satisfy ``req.specifier``; ``None`` otherwise. + """ + try: + installed = metadata.version(req.name) + except metadata.PackageNotFoundError: + return f"{req.name}{req.specifier} (not installed)" + if req.specifier and not req.specifier.contains(installed, prereleases=True): + return f"{req.name}{req.specifier} (installed: {installed})" + return None diff --git a/tests/test_deps.py b/tests/test_deps.py new file mode 100644 index 000000000..a55bada7e --- /dev/null +++ b/tests/test_deps.py @@ -0,0 +1,61 @@ +import re +from importlib import metadata + +from packaging.requirements import Requirement + +import autointent._deps as deps + +_EXTRA_RE = re.compile(r"""extra\s*==\s*['"]([^'"]+)['"]""") + + +class _FakeMeta: + def __init__(self, extras): + self._extras = extras + + def get_all(self, name, failobj=None): + if name == "Provides-Extra": + return list(self._extras) + return failobj + + +def _patch_metadata(monkeypatch, requires_map, versions): + """Patch importlib.metadata so deps.* sees a synthetic dependency graph. + + requires_map: {dist_name: [PEP 508 requirement string, ...]} + versions: {dist_name: installed_version_string} (absent key => not installed) + """ + def fake_requires(dist): + return requires_map.get(dist, []) + + def fake_version(name): + if name not in versions: + raise metadata.PackageNotFoundError(name) + return versions[name] + + def fake_metadata(dist): + extras = sorted({e for s in requires_map.get(dist, []) for e in _EXTRA_RE.findall(s)}) + return _FakeMeta(extras) + + monkeypatch.setattr(deps.metadata, "requires", fake_requires) + monkeypatch.setattr(deps.metadata, "version", fake_version) + monkeypatch.setattr(deps.metadata, "metadata", fake_metadata) + + +def test_check_returns_none_when_satisfied(monkeypatch): + _patch_metadata(monkeypatch, {}, {"catboost": "1.5.0"}) + assert deps._check(Requirement("catboost>=1.2.8,<2.0.0")) is None + + +def test_check_reports_missing(monkeypatch): + _patch_metadata(monkeypatch, {}, {}) + problem = deps._check(Requirement("catboost>=1.2.8")) + assert problem is not None + assert "catboost" in problem + assert "not installed" in problem + + +def test_check_reports_outdated(monkeypatch): + _patch_metadata(monkeypatch, {}, {"catboost": "1.0.0"}) + problem = deps._check(Requirement("catboost>=1.2.8,<2.0.0")) + assert problem is not None + assert "installed: 1.0.0" in problem From c0567bd644866813038f139cb9c06e151dfa691d Mon Sep 17 00:00:00 2001 From: voorhs Date: Tue, 23 Jun 2026 00:27:01 +0300 Subject: [PATCH 05/16] feat(deps): resolve a dist's requirements for a single extra --- src/autointent/_deps.py | 31 ++++++++++++++++++++++++++++--- tests/test_deps.py | 15 +++++++++++++++ 2 files changed, 43 insertions(+), 3 deletions(-) diff --git a/src/autointent/_deps.py b/src/autointent/_deps.py index 12c8dc2bd..dcb1cc199 100644 --- a/src/autointent/_deps.py +++ b/src/autointent/_deps.py @@ -11,10 +11,9 @@ from __future__ import annotations from importlib import metadata -from typing import TYPE_CHECKING -if TYPE_CHECKING: - from packaging.requirements import Requirement +from packaging.requirements import Requirement +from packaging.utils import canonicalize_name _DIST = "autointent" @@ -36,3 +35,29 @@ def _check(req: Requirement) -> str | None: if req.specifier and not req.specifier.contains(installed, prereleases=True): return f"{req.name}{req.specifier} (installed: {installed})" return None + + +def _iter_extra_reqs(dist: str, extra: str) -> list[Requirement]: + """Return the requirements of ``dist`` activated by ``extra``. + + A requirement is included only when its marker is satisfied *because* of the + extra: it must evaluate true with the extra set and false with no extra. This + excludes base dependencies that merely carry an environment marker. + + Args: + dist: Distribution name whose metadata is read. + extra: Extra name whose dependencies are wanted. + + Returns: + The parsed requirements activated by ``extra`` in the current environment. + """ + target = str(canonicalize_name(extra)) + result: list[Requirement] = [] + for spec in metadata.requires(dist) or []: + req = Requirement(spec) + marker = req.marker + if marker is None: + continue + if marker.evaluate({"extra": target}) and not marker.evaluate({"extra": ""}): + result.append(req) + return result diff --git a/tests/test_deps.py b/tests/test_deps.py index a55bada7e..62cab7bc2 100644 --- a/tests/test_deps.py +++ b/tests/test_deps.py @@ -59,3 +59,18 @@ def test_check_reports_outdated(monkeypatch): problem = deps._check(Requirement("catboost>=1.2.8,<2.0.0")) assert problem is not None assert "installed: 1.0.0" in problem + + +def test_iter_extra_reqs_selects_only_extra_members(monkeypatch): + _patch_metadata( + monkeypatch, + {"autointent": [ + "numpy>=1.0 ; python_version >= '3.0'", # base dep w/ env marker -> excluded + "torch>=2.0", # base dep, no marker -> excluded + "catboost>=1.2.8,<2.0.0 ; extra == 'catboost'", # extra member -> included + "peft>=0.10.0 ; extra == 'peft'", # different extra -> excluded + ]}, + {}, + ) + reqs = deps._iter_extra_reqs("autointent", "catboost") + assert {r.name for r in reqs} == {"catboost"} From bb743117a55db8d7fd294df7c69e069ff3e2caea Mon Sep 17 00:00:00 2001 From: voorhs Date: Tue, 23 Jun 2026 00:30:05 +0300 Subject: [PATCH 06/16] feat(deps): recursive extra resolution with cycle guard + cache --- src/autointent/_deps.py | 42 ++++++++++++++++++++++++++++++++++++ tests/test_deps.py | 48 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 90 insertions(+) diff --git a/src/autointent/_deps.py b/src/autointent/_deps.py index dcb1cc199..e7c53e9d8 100644 --- a/src/autointent/_deps.py +++ b/src/autointent/_deps.py @@ -10,6 +10,7 @@ from __future__ import annotations +from functools import cache from importlib import metadata from packaging.requirements import Requirement @@ -61,3 +62,44 @@ def _iter_extra_reqs(dist: str, extra: str) -> list[Requirement]: if marker.evaluate({"extra": target}) and not marker.evaluate({"extra": ""}): result.append(req) return result + + +def _resolve(dist: str, extra: str, seen: set[tuple[str, str]]) -> list[Requirement]: + """Recursively collect every leaf requirement activated by ``dist[extra]``. + + Each activated requirement is returned for version checking, and any nested + extras it declares (e.g. ``transformers[torch]``) are resolved in turn. + + Args: + dist: Distribution name to start from. + extra: Extra name to resolve. + seen: Visited ``(dist, extra)`` pairs, used to break dependency cycles. + + Returns: + The flattened list of requirements to validate. + """ + key = (str(canonicalize_name(dist)), str(canonicalize_name(extra))) + if key in seen: + return [] + seen.add(key) + + leaves: list[Requirement] = [] + for req in _iter_extra_reqs(dist, extra): + leaves.append(req) + for nested in req.extras: + leaves.extend(_resolve(req.name, nested, seen)) + return leaves + + +@cache +def _resolve_cached(dist: str, extra: str) -> tuple[Requirement, ...]: + """Memoized :func:`_resolve`; the metadata graph shape is stable per process. + + Args: + dist: Distribution name to start from. + extra: Extra name to resolve. + + Returns: + The resolved requirements as an immutable tuple. + """ + return tuple(_resolve(dist, extra, set())) diff --git a/tests/test_deps.py b/tests/test_deps.py index 62cab7bc2..712f2587a 100644 --- a/tests/test_deps.py +++ b/tests/test_deps.py @@ -1,6 +1,7 @@ import re from importlib import metadata +import pytest from packaging.requirements import Requirement import autointent._deps as deps @@ -74,3 +75,50 @@ def test_iter_extra_reqs_selects_only_extra_members(monkeypatch): ) reqs = deps._iter_extra_reqs("autointent", "catboost") assert {r.name for r in reqs} == {"catboost"} + + +@pytest.fixture(autouse=True) +def _clear_resolve_cache(): + deps._resolve_cached.cache_clear() + yield + deps._resolve_cached.cache_clear() + + +def test_resolve_recurses_into_nested_extra(monkeypatch): + _patch_metadata( + monkeypatch, + { + "autointent": ["transformers[torch]>=4.49.0,<5.0.0 ; extra == 'transformers'"], + "transformers": [ + "torch>=2.2 ; extra == 'torch'", + "accelerate>=0.26.0 ; extra == 'torch'", + ], + }, + {}, + ) + reqs = deps._resolve("autointent", "transformers", set()) + assert {r.name for r in reqs} == {"transformers", "torch", "accelerate"} + + +def test_resolve_terminates_on_cycle(monkeypatch): + _patch_metadata( + monkeypatch, + {"pkg": [ + "pkg[b]>=1.0 ; extra == 'a'", + "pkg[a]>=1.0 ; extra == 'b'", + ]}, + {}, + ) + reqs = deps._resolve("pkg", "a", set()) + assert {r.name for r in reqs} == {"pkg"} + + +def test_resolve_cached_returns_tuple(monkeypatch): + _patch_metadata( + monkeypatch, + {"autointent": ["catboost>=1.2.8 ; extra == 'catboost'"]}, + {}, + ) + result = deps._resolve_cached("autointent", "catboost") + assert isinstance(result, tuple) + assert {r.name for r in result} == {"catboost"} From 6057976c3cebfa3f37b9c8432ca28b4a3fdee674 Mon Sep 17 00:00:00 2001 From: voorhs Date: Tue, 23 Jun 2026 00:33:00 +0300 Subject: [PATCH 07/16] feat(deps): add require(extra) guard with aggregated, versioned errors (#322) --- src/autointent/_deps.py | 46 ++++++++++++++++++++++++++++++ tests/test_deps.py | 63 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 109 insertions(+) diff --git a/src/autointent/_deps.py b/src/autointent/_deps.py index e7c53e9d8..969dc9608 100644 --- a/src/autointent/_deps.py +++ b/src/autointent/_deps.py @@ -103,3 +103,49 @@ def _resolve_cached(dist: str, extra: str) -> tuple[Requirement, ...]: The resolved requirements as an immutable tuple. """ return tuple(_resolve(dist, extra, set())) + + +def _provides_extras(dist: str) -> set[str]: + """Return the normalized set of extras declared by ``dist``. + + Args: + dist: Distribution name whose metadata is read. + + Returns: + Normalized extra names from the distribution's ``Provides-Extra`` metadata. + """ + md = metadata.metadata(dist) + return {str(canonicalize_name(e)) for e in (md.get_all("Provides-Extra") or [])} + + +def require(extra: str, *, dist: str = _DIST) -> None: + """Ensure every dependency of an ``autointent`` extra is installed and current. + + Args: + extra: The extra to validate, e.g. ``"transformers"``. + dist: Distribution that declares the extra. Defaults to ``"autointent"``. + + Raises: + ValueError: If ``dist`` declares no such ``extra`` (typically a typo). + ImportError: If any required dependency is missing or its installed version + does not satisfy the constraint declared in the metadata. + """ + known = _provides_extras(dist) + if str(canonicalize_name(extra)) not in known: + msg = f"'{dist}' declares no extra '{extra}'. Known extras: {', '.join(sorted(known))}." + raise ValueError(msg) + + problems: list[str] = [] + for req in _resolve_cached(dist, extra): + problem = _check(req) + if problem is not None and problem not in problems: + problems.append(problem) + + if problems: + bullets = "\n".join(f" - {p}" for p in problems) + msg = ( + f"Feature requires extra '{extra}', but dependencies are missing or outdated:\n" + f"{bullets}\n" + f"Install with: pip install '{dist}[{extra}]'" + ) + raise ImportError(msg) diff --git a/tests/test_deps.py b/tests/test_deps.py index 712f2587a..afe9a9322 100644 --- a/tests/test_deps.py +++ b/tests/test_deps.py @@ -122,3 +122,66 @@ def test_resolve_cached_returns_tuple(monkeypatch): result = deps._resolve_cached("autointent", "catboost") assert isinstance(result, tuple) assert {r.name for r in result} == {"catboost"} + + +def test_require_passes_when_all_present(monkeypatch): + _patch_metadata( + monkeypatch, + {"autointent": ["catboost>=1.2.8,<2.0.0 ; extra == 'catboost'"]}, + {"catboost": "1.5.0"}, + ) + deps.require("catboost") # must not raise + + +def test_require_raises_for_missing_leaf(monkeypatch): + _patch_metadata( + monkeypatch, + {"autointent": ["catboost>=1.2.8,<2.0.0 ; extra == 'catboost'"]}, + {}, + ) + with pytest.raises(ImportError) as exc: + deps.require("catboost") + text = str(exc.value) + assert "catboost" in text + assert "not installed" in text + assert "pip install 'autointent[catboost]'" in text + + +def test_require_raises_for_outdated_version(monkeypatch): + _patch_metadata( + monkeypatch, + {"autointent": ["catboost>=1.2.8,<2.0.0 ; extra == 'catboost'"]}, + {"catboost": "1.0.0"}, + ) + with pytest.raises(ImportError) as exc: + deps.require("catboost") + assert "installed: 1.0.0" in str(exc.value) + + +def test_require_detects_missing_nested_accelerate(monkeypatch): + # Regression for #322: accelerate lives in transformers' own [torch] extra, + # so a transformers-present-but-accelerate-absent env must still be flagged. + _patch_metadata( + monkeypatch, + { + "autointent": ["transformers[torch]>=4.49.0,<5.0.0 ; extra == 'transformers'"], + "transformers": [ + "torch>=2.2 ; extra == 'torch'", + "accelerate>=0.26.0 ; extra == 'torch'", + ], + }, + {"transformers": "4.49.0", "torch": "2.2.0"}, # accelerate absent + ) + with pytest.raises(ImportError) as exc: + deps.require("transformers") + assert "accelerate" in str(exc.value) + + +def test_require_rejects_unknown_extra(monkeypatch): + _patch_metadata( + monkeypatch, + {"autointent": ["catboost>=1.2.8 ; extra == 'catboost'"]}, + {"catboost": "1.5.0"}, + ) + with pytest.raises(ValueError, match="no extra 'transfomers'"): + deps.require("transfomers") # typo From 8c023822daaf40f594f6df59473a19256806d63e Mon Sep 17 00:00:00 2001 From: voorhs Date: Tue, 23 Jun 2026 00:39:39 +0300 Subject: [PATCH 08/16] refactor(deps): migrate all require() call sites to extra-name guard (#322) --- src/autointent/_dump_tools/unit_dumpers.py | 20 +++++++++---------- src/autointent/_utils.py | 20 ------------------- src/autointent/_wrappers/embedder/openai.py | 6 +++--- .../embedder/sentence_transformers.py | 11 +++++----- src/autointent/_wrappers/embedder/vllm.py | 4 ++-- src/autointent/_wrappers/ranker.py | 4 ++-- src/autointent/generation/_generator.py | 4 ++-- src/autointent/modules/scoring/_bert.py | 4 ++-- .../scoring/_catboost/catboost_scorer.py | 4 ++-- src/autointent/modules/scoring/_lora/lora.py | 4 ++-- .../modules/scoring/_ptuning/ptuning.py | 4 ++-- .../advanced/02_embedder_configuration.py | 2 +- 12 files changed, 33 insertions(+), 54 deletions(-) diff --git a/src/autointent/_dump_tools/unit_dumpers.py b/src/autointent/_dump_tools/unit_dumpers.py index 69b5a276d..2b1c201bb 100644 --- a/src/autointent/_dump_tools/unit_dumpers.py +++ b/src/autointent/_dump_tools/unit_dumpers.py @@ -13,7 +13,7 @@ from sklearn.base import BaseEstimator from autointent import Embedder, Ranker, VectorIndex -from autointent._utils import require +from autointent._deps import require from autointent._wrappers import BaseTorchModule from autointent.schemas import TagsList @@ -225,8 +225,8 @@ def dump(obj: PeftModel, path: Path, exists_ok: bool) -> None: @staticmethod def load(path: Path, **kwargs: Any) -> PeftModel: # noqa: ANN401 - require("peft", extra="peft") - require("transformers", extra="transformers") + require("peft") + require("transformers") import peft import transformers @@ -245,7 +245,7 @@ def load(path: Path, **kwargs: Any) -> PeftModel: # noqa: ANN401 @classmethod def check_isinstance(cls, obj: Any) -> bool: # noqa: ANN401 try: - require("peft", extra="peft") + require("peft") import peft return isinstance(obj, peft.PeftModel) @@ -263,7 +263,7 @@ def dump(obj: PreTrainedModel, path: Path, exists_ok: bool) -> None: @staticmethod def load(path: Path, **kwargs: Any) -> PreTrainedModel: # noqa: ANN401 - require("transformers", extra="transformers") + require("transformers") import transformers return transformers.AutoModelForSequenceClassification.from_pretrained(path) # type: ignore[no-any-return] @@ -271,7 +271,7 @@ def load(path: Path, **kwargs: Any) -> PreTrainedModel: # noqa: ANN401 @classmethod def check_isinstance(cls, obj: Any) -> bool: # noqa: ANN401 try: - require("transformers", extra="transformers") + require("transformers") import transformers return isinstance(obj, transformers.PreTrainedModel) @@ -289,7 +289,7 @@ def dump(obj: PreTrainedTokenizer | PreTrainedTokenizerFast, path: Path, exists_ @staticmethod def load(path: Path, **kwargs: Any) -> PreTrainedTokenizer | PreTrainedTokenizerFast: # noqa: ANN401 - require("transformers", extra="transformers") + require("transformers") import transformers return transformers.AutoTokenizer.from_pretrained(path) # type: ignore[no-any-return,no-untyped-call] @@ -297,7 +297,7 @@ def load(path: Path, **kwargs: Any) -> PreTrainedTokenizer | PreTrainedTokenizer @classmethod def check_isinstance(cls, obj: Any) -> bool: # noqa: ANN401 try: - require("transformers", extra="transformers") + require("transformers") import transformers return isinstance(obj, transformers.PreTrainedTokenizer | transformers.PreTrainedTokenizerFast) @@ -342,7 +342,7 @@ def dump(obj: CatBoostClassifier, path: Path, exists_ok: bool) -> None: # noqa: @staticmethod def load(path: Path, **kwargs: Any) -> CatBoostClassifier: # noqa: ANN401 - require("catboost", extra="catboost") + require("catboost") from catboost import CatBoostClassifier model = CatBoostClassifier() @@ -352,7 +352,7 @@ def load(path: Path, **kwargs: Any) -> CatBoostClassifier: # noqa: ANN401 @classmethod def check_isinstance(cls, obj: Any) -> bool: # noqa: ANN401 try: - require("catboost", extra="catboost") + require("catboost") from catboost import CatBoostClassifier return isinstance(obj, CatBoostClassifier) diff --git a/src/autointent/_utils.py b/src/autointent/_utils.py index 4ecbb6e4d..c8a8614b7 100644 --- a/src/autointent/_utils.py +++ b/src/autointent/_utils.py @@ -1,6 +1,5 @@ """Utils.""" -import importlib from typing import TypeVar import torch @@ -28,22 +27,3 @@ def detect_device() -> str: return "cpu" -def require(dependency: str, extra: str | None = None) -> None: - """Try to import dependency, raise informative ImportError if missing. - - Args: - dependency: The name of the module to import - extra: Optional extra package name for pip install instructions - - Returns: - The imported module - - Raises: - ImportError: If the dependency is not installed - """ - try: - importlib.import_module(dependency) - except ImportError as e: - extra_info = f" Install with `pip install autointent[{extra}]`." if extra else "" - msg = f"Missing dependency '{dependency}' required for this feature.{extra_info}" - raise ImportError(msg) from e diff --git a/src/autointent/_wrappers/embedder/openai.py b/src/autointent/_wrappers/embedder/openai.py index 9eada4f05..c1ae6ee3f 100644 --- a/src/autointent/_wrappers/embedder/openai.py +++ b/src/autointent/_wrappers/embedder/openai.py @@ -12,8 +12,8 @@ import numpy.typing as npt import torch +from autointent._deps import require from autointent._hash import Hasher -from autointent._utils import require from autointent.configs._embedder import OpenaiEmbeddingConfig from .base import BaseEmbeddingBackend @@ -77,7 +77,7 @@ def _openai_api_error_message(exc: BaseException, *, batch_size: int) -> str: def _tiktoken_encoding_for_embedding_model(model_name: str) -> Encoding: """Resolve tiktoken encoding for batch sizing; fallback for unknown provider model ids.""" - require("tiktoken", "openai") + require("openai") import tiktoken try: @@ -110,7 +110,7 @@ def __init__(self, config: OpenaiEmbeddingConfig) -> None: Args: config: Configuration for OpenAI embeddings. """ - require("openai", "openai") + require("openai") self.config = config self._event_loop: asyncio.AbstractEventLoop | None = None diff --git a/src/autointent/_wrappers/embedder/sentence_transformers.py b/src/autointent/_wrappers/embedder/sentence_transformers.py index cbb921409..772737fed 100644 --- a/src/autointent/_wrappers/embedder/sentence_transformers.py +++ b/src/autointent/_wrappers/embedder/sentence_transformers.py @@ -14,8 +14,8 @@ from datasets import Dataset from sklearn.model_selection import train_test_split +from autointent._deps import require from autointent._hash import Hasher -from autointent._utils import require from autointent.configs._embedder import SentenceTransformerEmbeddingConfig from .base import BaseEmbeddingBackend @@ -42,7 +42,7 @@ def _set_training_seed(seed: int) -> None: if torch.cuda.is_available(): torch.cuda.manual_seed_all(seed) - require("transformers", extra="transformers") + require("transformers") from transformers import set_seed set_seed(seed) @@ -130,7 +130,7 @@ def _load_model(self) -> SentenceTransformer: """Load sentence transformers model to device.""" if self._model is None: # Lazy import sentence-transformers - require("sentence_transformers", extra="sentence-transformers") + require("sentence-transformers") from sentence_transformers import SentenceTransformer res = SentenceTransformer( @@ -294,9 +294,8 @@ def train(self, utterances: list[str], labels: ListOfLabels, config: EmbedderFin _set_training_seed(config.seed) # Lazy import sentence-transformers training components (only needed for fine-tuning) - require("sentence_transformers", extra="sentence-transformers") - require("transformers", extra="transformers") - require("accelerate", extra="transformers") + require("sentence-transformers") + require("transformers") from sentence_transformers import ( SentenceTransformerTrainer, SentenceTransformerTrainingArguments, diff --git a/src/autointent/_wrappers/embedder/vllm.py b/src/autointent/_wrappers/embedder/vllm.py index 170c73aa5..691686ca3 100644 --- a/src/autointent/_wrappers/embedder/vllm.py +++ b/src/autointent/_wrappers/embedder/vllm.py @@ -9,8 +9,8 @@ import numpy as np import torch +from autointent._deps import require from autointent._hash import Hasher -from autointent._utils import require from autointent.configs._embedder import VllmEmbeddingConfig from .base import BaseEmbeddingBackend @@ -44,7 +44,7 @@ def __init__(self, config: VllmEmbeddingConfig) -> None: def _load_model(self) -> LLM: """Lazy-load the vLLM LLM engine on first use.""" if self._model is None: - require("vllm", extra="vllm") + require("vllm") from vllm import LLM kwargs = { diff --git a/src/autointent/_wrappers/ranker.py b/src/autointent/_wrappers/ranker.py index 5e3826693..8e73f0be5 100644 --- a/src/autointent/_wrappers/ranker.py +++ b/src/autointent/_wrappers/ranker.py @@ -20,7 +20,7 @@ from sklearn.linear_model import LogisticRegressionCV from torch import nn -from autointent._utils import require +from autointent._deps import require from autointent.configs import CrossEncoderConfig from autointent.custom_types import RerankedItem @@ -118,7 +118,7 @@ def __init__( output_range: Range of the output probabilities ([0, 1] for sigmoid, [-1, 1] for tanh) """ # Lazy import sentence-transformers - require("sentence_transformers", extra="sentence-transformers") + require("sentence-transformers") from sentence_transformers import CrossEncoder self.config = CrossEncoderConfig.from_search_config(cross_encoder_config) diff --git a/src/autointent/generation/_generator.py b/src/autointent/generation/_generator.py index 3a919c656..44d1dcdee 100644 --- a/src/autointent/generation/_generator.py +++ b/src/autointent/generation/_generator.py @@ -11,7 +11,7 @@ from dotenv import load_dotenv from pydantic import BaseModel, ValidationError -from autointent._utils import require +from autointent._deps import require from autointent.generation.chat_templates import Message, Role from ._cache import StructuredOutputCache @@ -139,7 +139,7 @@ def __init__( client_params: Additional parameters for client. **generation_params: Additional generation parameters to override defaults passed to OpenAI completions API. """ - require("openai", "openai") + require("openai") import openai base_url = base_url or os.getenv("OPENAI_BASE_URL") diff --git a/src/autointent/modules/scoring/_bert.py b/src/autointent/modules/scoring/_bert.py index d5da50c79..816fd863e 100644 --- a/src/autointent/modules/scoring/_bert.py +++ b/src/autointent/modules/scoring/_bert.py @@ -13,7 +13,7 @@ from autointent import Context from autointent._callbacks import REPORTERS_NAMES -from autointent._utils import require +from autointent._deps import require from autointent.configs import EarlyStoppingConfig, HFModelConfig from autointent.metrics import SCORING_METRICS_MULTICLASS, SCORING_METRICS_MULTILABEL from autointent.modules.base import BaseScorer @@ -88,7 +88,7 @@ def __init__( early_stopping_config: EarlyStoppingConfig | dict[str, Any] | None = None, print_progress: bool = False, ) -> None: - require("transformers", "transformers") + require("transformers") self.classification_model_config = HFModelConfig.from_search_config(classification_model_config) self.num_train_epochs = num_train_epochs self.batch_size = batch_size diff --git a/src/autointent/modules/scoring/_catboost/catboost_scorer.py b/src/autointent/modules/scoring/_catboost/catboost_scorer.py index cb1770374..82eba5a40 100644 --- a/src/autointent/modules/scoring/_catboost/catboost_scorer.py +++ b/src/autointent/modules/scoring/_catboost/catboost_scorer.py @@ -11,7 +11,7 @@ from pydantic import PositiveInt from autointent import Context, Embedder -from autointent._utils import require +from autointent._deps import require from autointent.configs import EmbedderConfig, TaskTypeEnum, initialize_embedder_config from autointent.custom_types import FloatFromZeroToOne, ListOfLabels from autointent.modules.base import BaseScorer @@ -110,7 +110,7 @@ def __init__( **catboost_kwargs: Any, # noqa: ANN401 ) -> None: # Lazy import catboost - require("catboost", extra="catboost") + require("catboost") self.val_fraction = val_fraction self.early_stopping_rounds = early_stopping_rounds diff --git a/src/autointent/modules/scoring/_lora/lora.py b/src/autointent/modules/scoring/_lora/lora.py index 2b0030815..f310bd3eb 100644 --- a/src/autointent/modules/scoring/_lora/lora.py +++ b/src/autointent/modules/scoring/_lora/lora.py @@ -6,8 +6,8 @@ from typing import TYPE_CHECKING, Any, Literal from autointent import Context +from autointent._deps import require from autointent._dump_tools import Dumper -from autointent._utils import require from autointent.configs import EarlyStoppingConfig, HFModelConfig from autointent.modules.scoring._bert import BertScorer @@ -75,7 +75,7 @@ def __init__( **lora_kwargs: Any, # noqa: ANN401 ) -> None: # Lazy import peft - require("peft", extra="peft") + require("peft") from peft import LoraConfig # early stopping doesn't work with lora for now https://github.com/huggingface/transformers/issues/38130 diff --git a/src/autointent/modules/scoring/_ptuning/ptuning.py b/src/autointent/modules/scoring/_ptuning/ptuning.py index 4edb86749..162ee502d 100644 --- a/src/autointent/modules/scoring/_ptuning/ptuning.py +++ b/src/autointent/modules/scoring/_ptuning/ptuning.py @@ -8,8 +8,8 @@ from pydantic import PositiveInt from autointent import Context +from autointent._deps import require from autointent._dump_tools import Dumper -from autointent._utils import require from autointent.configs import EarlyStoppingConfig, HFModelConfig from autointent.modules.scoring._bert import BertScorer @@ -73,7 +73,7 @@ def __init__( # noqa: PLR0913 **ptuning_kwargs: Any, # noqa: ANN401 ) -> None: # Lazy import peft - require("peft", extra="peft") + require("peft") from peft import PromptEncoderConfig, PromptEncoderReparameterizationType, TaskType diff --git a/user_guides/advanced/02_embedder_configuration.py b/user_guides/advanced/02_embedder_configuration.py index f788e3334..1dd436ecf 100644 --- a/user_guides/advanced/02_embedder_configuration.py +++ b/user_guides/advanced/02_embedder_configuration.py @@ -19,7 +19,7 @@ pip install "autointent[sentence-transformers]" ``` -Other backends need their own extras, for example `autointent[openai]` or `autointent[vllm]`, as shown in the sections below. When a backend package is missing, code paths that need it typically call `autointent._utils.require`, which raises an `ImportError` that includes the matching `pip install autointent[]` hint. +Other backends need their own extras, for example `autointent[openai]` or `autointent[vllm]`, as shown in the sections below. When a backend package is missing, code paths that need it typically call `autointent._deps.require`, which raises an `ImportError` that includes the matching `pip install autointent[]` hint. ## Configuration Approaches From a6d0e614fe2d5b827a862bd1961d95d4622be639 Mon Sep 17 00:00:00 2001 From: voorhs Date: Tue, 23 Jun 2026 00:43:43 +0300 Subject: [PATCH 09/16] test(deps): smoke-test require resolution against real metadata --- tests/test_deps.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/tests/test_deps.py b/tests/test_deps.py index afe9a9322..26673ad6a 100644 --- a/tests/test_deps.py +++ b/tests/test_deps.py @@ -185,3 +185,8 @@ def test_require_rejects_unknown_extra(monkeypatch): ) with pytest.raises(ValueError, match="no extra 'transfomers'"): deps.require("transfomers") # typo + + +def test_resolve_reads_real_autointent_metadata(): + reqs = deps._resolve_cached("autointent", "catboost") + assert any(r.name == "catboost" for r in reqs) From e5e45b4fc73e6b65f22d5654be1d2a9cbbd4134d Mon Sep 17 00:00:00 2001 From: voorhs Date: Tue, 23 Jun 2026 00:48:39 +0300 Subject: [PATCH 10/16] test(deps): add from __future__ import annotations to test module (#322) Co-Authored-By: Claude Opus 4.8 --- tests/test_deps.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/test_deps.py b/tests/test_deps.py index 26673ad6a..bcb6eb542 100644 --- a/tests/test_deps.py +++ b/tests/test_deps.py @@ -1,3 +1,5 @@ +from __future__ import annotations + import re from importlib import metadata From 6889e8793b5480fcb7af8a584e7ddfbe121cfee8 Mon Sep 17 00:00:00 2001 From: voorhs Date: Tue, 23 Jun 2026 01:05:06 +0300 Subject: [PATCH 11/16] fix(deps): type-annotate test_deps for mypy strict; patch importlib.metadata directly (#322) CI runs 'mypy src/autointent tests' under strict on py3.10; the test module needed full annotations and patching via the shared importlib.metadata module object (deps.metadata tripped no-implicit-reexport attr-defined). Co-Authored-By: Claude Opus 4.8 --- tests/test_deps.py | 54 ++++++++++++++++++++++++++-------------------- 1 file changed, 31 insertions(+), 23 deletions(-) diff --git a/tests/test_deps.py b/tests/test_deps.py index bcb6eb542..5f77364b8 100644 --- a/tests/test_deps.py +++ b/tests/test_deps.py @@ -2,54 +2,62 @@ import re from importlib import metadata +from typing import TYPE_CHECKING import pytest from packaging.requirements import Requirement import autointent._deps as deps +if TYPE_CHECKING: + from collections.abc import Iterator + _EXTRA_RE = re.compile(r"""extra\s*==\s*['"]([^'"]+)['"]""") class _FakeMeta: - def __init__(self, extras): + def __init__(self, extras: list[str]) -> None: self._extras = extras - def get_all(self, name, failobj=None): + def get_all(self, name: str, failobj: list[str] | None = None) -> list[str] | None: if name == "Provides-Extra": return list(self._extras) return failobj -def _patch_metadata(monkeypatch, requires_map, versions): +def _patch_metadata( + monkeypatch: pytest.MonkeyPatch, + requires_map: dict[str, list[str]], + versions: dict[str, str], +) -> None: """Patch importlib.metadata so deps.* sees a synthetic dependency graph. requires_map: {dist_name: [PEP 508 requirement string, ...]} versions: {dist_name: installed_version_string} (absent key => not installed) """ - def fake_requires(dist): + def fake_requires(dist: str) -> list[str]: return requires_map.get(dist, []) - def fake_version(name): + def fake_version(name: str) -> str: if name not in versions: raise metadata.PackageNotFoundError(name) return versions[name] - def fake_metadata(dist): + def fake_metadata(dist: str) -> _FakeMeta: extras = sorted({e for s in requires_map.get(dist, []) for e in _EXTRA_RE.findall(s)}) return _FakeMeta(extras) - monkeypatch.setattr(deps.metadata, "requires", fake_requires) - monkeypatch.setattr(deps.metadata, "version", fake_version) - monkeypatch.setattr(deps.metadata, "metadata", fake_metadata) + monkeypatch.setattr(metadata, "requires", fake_requires) + monkeypatch.setattr(metadata, "version", fake_version) + monkeypatch.setattr(metadata, "metadata", fake_metadata) -def test_check_returns_none_when_satisfied(monkeypatch): +def test_check_returns_none_when_satisfied(monkeypatch: pytest.MonkeyPatch) -> None: _patch_metadata(monkeypatch, {}, {"catboost": "1.5.0"}) assert deps._check(Requirement("catboost>=1.2.8,<2.0.0")) is None -def test_check_reports_missing(monkeypatch): +def test_check_reports_missing(monkeypatch: pytest.MonkeyPatch) -> None: _patch_metadata(monkeypatch, {}, {}) problem = deps._check(Requirement("catboost>=1.2.8")) assert problem is not None @@ -57,14 +65,14 @@ def test_check_reports_missing(monkeypatch): assert "not installed" in problem -def test_check_reports_outdated(monkeypatch): +def test_check_reports_outdated(monkeypatch: pytest.MonkeyPatch) -> None: _patch_metadata(monkeypatch, {}, {"catboost": "1.0.0"}) problem = deps._check(Requirement("catboost>=1.2.8,<2.0.0")) assert problem is not None assert "installed: 1.0.0" in problem -def test_iter_extra_reqs_selects_only_extra_members(monkeypatch): +def test_iter_extra_reqs_selects_only_extra_members(monkeypatch: pytest.MonkeyPatch) -> None: _patch_metadata( monkeypatch, {"autointent": [ @@ -80,13 +88,13 @@ def test_iter_extra_reqs_selects_only_extra_members(monkeypatch): @pytest.fixture(autouse=True) -def _clear_resolve_cache(): +def _clear_resolve_cache() -> Iterator[None]: deps._resolve_cached.cache_clear() yield deps._resolve_cached.cache_clear() -def test_resolve_recurses_into_nested_extra(monkeypatch): +def test_resolve_recurses_into_nested_extra(monkeypatch: pytest.MonkeyPatch) -> None: _patch_metadata( monkeypatch, { @@ -102,7 +110,7 @@ def test_resolve_recurses_into_nested_extra(monkeypatch): assert {r.name for r in reqs} == {"transformers", "torch", "accelerate"} -def test_resolve_terminates_on_cycle(monkeypatch): +def test_resolve_terminates_on_cycle(monkeypatch: pytest.MonkeyPatch) -> None: _patch_metadata( monkeypatch, {"pkg": [ @@ -115,7 +123,7 @@ def test_resolve_terminates_on_cycle(monkeypatch): assert {r.name for r in reqs} == {"pkg"} -def test_resolve_cached_returns_tuple(monkeypatch): +def test_resolve_cached_returns_tuple(monkeypatch: pytest.MonkeyPatch) -> None: _patch_metadata( monkeypatch, {"autointent": ["catboost>=1.2.8 ; extra == 'catboost'"]}, @@ -126,7 +134,7 @@ def test_resolve_cached_returns_tuple(monkeypatch): assert {r.name for r in result} == {"catboost"} -def test_require_passes_when_all_present(monkeypatch): +def test_require_passes_when_all_present(monkeypatch: pytest.MonkeyPatch) -> None: _patch_metadata( monkeypatch, {"autointent": ["catboost>=1.2.8,<2.0.0 ; extra == 'catboost'"]}, @@ -135,7 +143,7 @@ def test_require_passes_when_all_present(monkeypatch): deps.require("catboost") # must not raise -def test_require_raises_for_missing_leaf(monkeypatch): +def test_require_raises_for_missing_leaf(monkeypatch: pytest.MonkeyPatch) -> None: _patch_metadata( monkeypatch, {"autointent": ["catboost>=1.2.8,<2.0.0 ; extra == 'catboost'"]}, @@ -149,7 +157,7 @@ def test_require_raises_for_missing_leaf(monkeypatch): assert "pip install 'autointent[catboost]'" in text -def test_require_raises_for_outdated_version(monkeypatch): +def test_require_raises_for_outdated_version(monkeypatch: pytest.MonkeyPatch) -> None: _patch_metadata( monkeypatch, {"autointent": ["catboost>=1.2.8,<2.0.0 ; extra == 'catboost'"]}, @@ -160,7 +168,7 @@ def test_require_raises_for_outdated_version(monkeypatch): assert "installed: 1.0.0" in str(exc.value) -def test_require_detects_missing_nested_accelerate(monkeypatch): +def test_require_detects_missing_nested_accelerate(monkeypatch: pytest.MonkeyPatch) -> None: # Regression for #322: accelerate lives in transformers' own [torch] extra, # so a transformers-present-but-accelerate-absent env must still be flagged. _patch_metadata( @@ -179,7 +187,7 @@ def test_require_detects_missing_nested_accelerate(monkeypatch): assert "accelerate" in str(exc.value) -def test_require_rejects_unknown_extra(monkeypatch): +def test_require_rejects_unknown_extra(monkeypatch: pytest.MonkeyPatch) -> None: _patch_metadata( monkeypatch, {"autointent": ["catboost>=1.2.8 ; extra == 'catboost'"]}, @@ -189,6 +197,6 @@ def test_require_rejects_unknown_extra(monkeypatch): deps.require("transfomers") # typo -def test_resolve_reads_real_autointent_metadata(): +def test_resolve_reads_real_autointent_metadata() -> None: reqs = deps._resolve_cached("autointent", "catboost") assert any(r.name == "catboost" for r in reqs) From 8bf7b09309db48c277ea38db846a70d10e8ed654 Mon Sep 17 00:00:00 2001 From: voorhs Date: Tue, 23 Jun 2026 22:39:24 +0300 Subject: [PATCH 12/16] fix critical error with nested extras' requirements iteration --- src/autointent/_deps.py | 40 ++++++++++++++++++++++++++++++++++------ tests/test_deps.py | 32 +++++++++++++++++++++++++++++++- 2 files changed, 65 insertions(+), 7 deletions(-) diff --git a/src/autointent/_deps.py b/src/autointent/_deps.py index 969dc9608..fd7953606 100644 --- a/src/autointent/_deps.py +++ b/src/autointent/_deps.py @@ -41,24 +41,52 @@ def _check(req: Requirement) -> str | None: def _iter_extra_reqs(dist: str, extra: str) -> list[Requirement]: """Return the requirements of ``dist`` activated by ``extra``. - A requirement is included only when its marker is satisfied *because* of the - extra: it must evaluate true with the extra set and false with no extra. This - excludes base dependencies that merely carry an environment marker. - Args: dist: Distribution name whose metadata is read. extra: Extra name whose dependencies are wanted. Returns: - The parsed requirements activated by ``extra`` in the current environment. + The parsed requirements activated by ``extra`` in the current environment, + or an empty list if ``dist`` is not installed (its metadata is unavailable). """ target = str(canonicalize_name(extra)) result: list[Requirement] = [] - for spec in metadata.requires(dist) or []: + try: + reqs = metadata.requires(dist) + except metadata.PackageNotFoundError: + # `dist` itself isn't installed, so we can't read its nested-extra + # requirements. That's fine: the parent requirement that led us to recurse + # here (e.g. `transformers[torch]`) was already collected by the caller and + # `_check` will flag it as "not installed", producing the proper aggregated + # ImportError with the install hint -- rather than letting a raw + # PackageNotFoundError leak out of the resolver. + return [] + for spec in reqs or []: req = Requirement(spec) + # `req.marker` is the parsed `;` clause of the PEP 508 requirement (a + # packaging Marker), or None when the requirement has no `;` clause. There + # are three cases: + # (1) no marker -> an unconditional base dependency; + # (2) a marker that references `extra` -> belongs to an extra; + # (3) a marker with only environment conditions (e.g. `python_version < "3.9"`) + # -> still a base dependency, just platform-conditional. + # So "has a marker" does NOT mean "belongs to an extra"; marker = req.marker + # Here we cancel out case (1) if marker is None: continue + # `marker.evaluate(env)` resolves the whole boolean expression to a bool, + # filling any keys we omit (python_version, sys_platform, ...) from the + # running interpreter. A single `evaluate({"extra": target})` is not enough + # to prove membership: an env-conditional base dep also passes it, because + # its truth comes from the environment and the `extra` key is ignored. + # The discriminator is the second evaluation: a *true* extra dependency + # flips active -> inactive when the extra is removed, whereas a base dep is + # unaffected. So "active with the extra AND inactive with no extra" means + # "active *because of* this extra", which keeps extra members and drops + # base deps. We always pass `extra` explicitly (`""` = base install, no + # extras) since a marker that references `extra` can't be evaluated without it. + # So here we cancel out case (3) if marker.evaluate({"extra": target}) and not marker.evaluate({"extra": ""}): result.append(req) return result diff --git a/tests/test_deps.py b/tests/test_deps.py index 5f77364b8..c3f9501fb 100644 --- a/tests/test_deps.py +++ b/tests/test_deps.py @@ -36,7 +36,13 @@ def _patch_metadata( versions: {dist_name: installed_version_string} (absent key => not installed) """ def fake_requires(dist: str) -> list[str]: - return requires_map.get(dist, []) + # Mirror the real importlib.metadata.requires: a dist with no metadata + # (i.e. not installed) raises PackageNotFoundError rather than returning []. + # A dist that is installed but has no requirements is modelled by an empty + # list in requires_map. + if dist not in requires_map: + raise metadata.PackageNotFoundError(dist) + return requires_map[dist] def fake_version(name: str) -> str: if name not in versions: @@ -187,6 +193,30 @@ def test_require_detects_missing_nested_accelerate(monkeypatch: pytest.MonkeyPat assert "accelerate" in str(exc.value) +def test_require_reports_extra_package_entirely_missing(monkeypatch: pytest.MonkeyPatch) -> None: + # Bare-install path: the extra's top-level package isn't installed at all, so + # recursing into its nested [torch] extra would read missing metadata. The + # resolver must NOT leak a raw PackageNotFoundError; instead the parent + # `transformers[torch]` requirement is flagged as missing with the install hint. + _patch_metadata( + monkeypatch, + {"autointent": ["transformers[torch]>=4.49.0,<5.0.0 ; extra == 'transformers'"]}, + {}, # transformers (and everything else) absent + ) + with pytest.raises(ImportError) as exc: + deps.require("transformers") + text = str(exc.value) + assert "transformers" in text + assert "not installed" in text + assert "pip install 'autointent[transformers]'" in text + + +def test_iter_extra_reqs_returns_empty_for_uninstalled_dist(monkeypatch: pytest.MonkeyPatch) -> None: + # The metadata read for a not-installed dist must be swallowed and yield []. + _patch_metadata(monkeypatch, {}, {}) + assert deps._iter_extra_reqs("not-installed", "torch") == [] + + def test_require_rejects_unknown_extra(monkeypatch: pytest.MonkeyPatch) -> None: _patch_metadata( monkeypatch, From 27e1e26c1eb50da3a7da269587bcac50bf6b1395 Mon Sep 17 00:00:00 2001 From: voorhs Date: Tue, 23 Jun 2026 23:02:16 +0300 Subject: [PATCH 13/16] tighten tests --- tests/test_deps.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/tests/test_deps.py b/tests/test_deps.py index c3f9501fb..74257cd76 100644 --- a/tests/test_deps.py +++ b/tests/test_deps.py @@ -228,5 +228,19 @@ def test_require_rejects_unknown_extra(monkeypatch: pytest.MonkeyPatch) -> None: def test_resolve_reads_real_autointent_metadata() -> None: + # catboost is deliberately chosen: a flat, recursion-free extra, so this real- + # metadata wiring check is deterministic regardless of what CI installs. reqs = deps._resolve_cached("autointent", "catboost") assert any(r.name == "catboost" for r in reqs) + assert all(not r.extras for r in reqs) # documents the "no nested extra" premise + + +def test_resolve_every_real_extra_without_raising() -> None: + # Walk every extra autointent actually declares (incl. transformers[torch], + # which recurses into a nested extra) against real metadata. The resolver must + # never raise, regardless of which optional packages CI installed -- this is the + # real-metadata guard for the not-installed-nested-dist fix. + extras = deps._provides_extras("autointent") + assert extras # sanity: metadata wiring returns *something* + for extra in extras: + deps._resolve_cached("autointent", extra) # must not raise From 34f3a0aaa01c82c2251ecdf9affcdff4f1196d2d Mon Sep 17 00:00:00 2001 From: voorhs Date: Tue, 23 Jun 2026 23:07:06 +0300 Subject: [PATCH 14/16] use literal with actual autointent's extras enumerated --- src/autointent/_deps.py | 25 ++++++++++++++++++++++++- tests/test_deps.py | 14 ++++++++++++-- 2 files changed, 36 insertions(+), 3 deletions(-) diff --git a/src/autointent/_deps.py b/src/autointent/_deps.py index fd7953606..c3661b049 100644 --- a/src/autointent/_deps.py +++ b/src/autointent/_deps.py @@ -12,12 +12,35 @@ from functools import cache from importlib import metadata +from typing import Literal from packaging.requirements import Requirement from packaging.utils import canonicalize_name _DIST = "autointent" +# Names of the optional-dependency extras autointent declares, mirrored from the +# installed ``Provides-Extra`` metadata (and thus pyproject's +# [project.optional-dependencies]). Typing ``require``'s parameter with this makes +# mypy reject misspelled extra names at call sites; the runtime check in ``require`` +# stays the source of truth (mypy isn't run at runtime, and ``dist`` overrides or +# dynamic calls bypass static typing). Kept in sync with the real metadata by +# tests/test_deps.py::test_extra_literal_matches_real_metadata. +Extra = Literal[ + "catboost", + "codecarbon", + "dspy", + "fastapi", + "fastmcp", + "openai", + "opensearch", + "peft", + "sentence-transformers", + "transformers", + "vllm", + "wandb", +] + def _check(req: Requirement) -> str | None: """Check a single requirement against the installed environment. @@ -146,7 +169,7 @@ def _provides_extras(dist: str) -> set[str]: return {str(canonicalize_name(e)) for e in (md.get_all("Provides-Extra") or [])} -def require(extra: str, *, dist: str = _DIST) -> None: +def require(extra: Extra, *, dist: str = _DIST) -> None: """Ensure every dependency of an ``autointent`` extra is installed and current. Args: diff --git a/tests/test_deps.py b/tests/test_deps.py index 74257cd76..a5c74a716 100644 --- a/tests/test_deps.py +++ b/tests/test_deps.py @@ -2,7 +2,7 @@ import re from importlib import metadata -from typing import TYPE_CHECKING +from typing import TYPE_CHECKING, get_args import pytest from packaging.requirements import Requirement @@ -223,8 +223,10 @@ def test_require_rejects_unknown_extra(monkeypatch: pytest.MonkeyPatch) -> None: {"autointent": ["catboost>=1.2.8 ; extra == 'catboost'"]}, {"catboost": "1.5.0"}, ) + # "transfomers" is intentionally invalid (typo) to exercise the runtime guard; + # the type: ignore is required because `Extra` now rejects it at type-check time. with pytest.raises(ValueError, match="no extra 'transfomers'"): - deps.require("transfomers") # typo + deps.require("transfomers") # type: ignore[arg-type] def test_resolve_reads_real_autointent_metadata() -> None: @@ -235,6 +237,14 @@ def test_resolve_reads_real_autointent_metadata() -> None: assert all(not r.extras for r in reqs) # documents the "no nested extra" premise +def test_extra_literal_matches_real_metadata() -> None: + # The `Extra` Literal is a hand-maintained mirror of the real Provides-Extra + # metadata. This fails if pyproject gains/loses an extra without the Literal being + # updated (or vice versa), keeping the static type honest and preventing the + # manual-sync drift the metadata-driven design otherwise removes. + assert set(get_args(deps.Extra)) == deps._provides_extras("autointent") + + def test_resolve_every_real_extra_without_raising() -> None: # Walk every extra autointent actually declares (incl. transformers[torch], # which recurses into a nested extra) against real metadata. The resolver must From d822dfc7296a46ba1c811ece6e0ab26e7bfe259b Mon Sep 17 00:00:00 2001 From: voorhs Date: Tue, 23 Jun 2026 23:27:42 +0300 Subject: [PATCH 15/16] remove specs --- .../2026-06-22-require-extra-resolver.md | 689 ------------------ ...026-06-22-require-extra-resolver-design.md | 170 ----- 2 files changed, 859 deletions(-) delete mode 100644 docs/superpowers/plans/2026-06-22-require-extra-resolver.md delete mode 100644 docs/superpowers/specs/2026-06-22-require-extra-resolver-design.md diff --git a/docs/superpowers/plans/2026-06-22-require-extra-resolver.md b/docs/superpowers/plans/2026-06-22-require-extra-resolver.md deleted file mode 100644 index a47be0a27..000000000 --- a/docs/superpowers/plans/2026-06-22-require-extra-resolver.md +++ /dev/null @@ -1,689 +0,0 @@ -# Metadata-driven `require(extra)` Implementation Plan - -> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking. - -**Goal:** Replace the import-based `require(dependency, extra)` guard with `require(extra)`, which reads installed package metadata to verify every dependency of an autointent extra — recursively, including nested extras like `transformers[torch] → accelerate` — is installed and version-satisfied. - -**Architecture:** A new focused module `src/autointent/_deps.py` resolves an extra's requirement graph via `importlib.metadata` (the build bakes `pyproject.toml`'s requirements into the wheel as metadata; the source file is not shipped) and validates each leaf with `packaging`. `require` moves there; all ~20 call sites pass just the extra name. Fixes #322 structurally — the missing `accelerate` is caught by recursing into `transformers`' own `torch` extra, with no hand-added guard. - -**Tech Stack:** Python 3.10+, `importlib.metadata` (stdlib), `packaging` (promoted to a core dependency), pytest with `monkeypatch`. - -## Global Constraints - -- **Lint:** `ruff` runs with `select = ["ALL"]`, line-length 120, target `py310`, **google docstring convention**. Every module/function needs a google-style docstring; every parameter and return needs a type annotation. Raise exceptions via a pre-assigned `msg` variable (satisfies `EM`/`TRY003`). -- **Types:** `mypy --strict` on **python 3.10**. `from __future__ import annotations` at the top of every new `.py`. When building a typed `set[str]`/`dict` from `packaging.utils.canonicalize_name` (which returns `NormalizedName`), wrap in `str(...)` — `set` is invariant so `set[NormalizedName]` is not assignable to `set[str]`. -- **New core dependency:** add `packaging (>=23.2)` to `[project].dependencies` (no upper cap — avoids resolver conflicts; it is currently present only transitively, the same mistake #322 is about). -- **Testing:** Do **NOT** run the full pytest suite locally — it can freeze the machine. The new `tests/test_deps.py` is pure Python (monkeypatched metadata, no model loading) and is the **only** suite you may run locally, via `pytest tests/test_deps.py -q`. Full-suite + call-site migration verification goes through **CI** (push branch + coverage dispatch). `ruff` and `mypy` may be run locally. -- **Do not** commit `uv.lock` (gitignored by design; CI re-solves deps). -- **Extra names are passed verbatim as declared in `pyproject.toml`** (e.g. `"sentence-transformers"` with a hyphen). - ---- - -### Task 1: Create `_deps.py` with `_check` + add `packaging` core dep - -**Files:** -- Modify: `pyproject.toml` (add `packaging` to `[project].dependencies`) -- Create: `src/autointent/_deps.py` -- Create: `tests/test_deps.py` - -**Interfaces:** -- Produces: `_check(req: packaging.requirements.Requirement) -> str | None` — returns a one-line problem description if the dist named by `req` is not installed or its installed version is outside `req.specifier`, else `None`. - -- [ ] **Step 1: Add `packaging` to core dependencies** - -In `pyproject.toml`, inside `[project]` `dependencies = [ ... ]`, add one line (keep existing entries): - -```toml - "packaging (>=23.2)", -``` - -- [ ] **Step 2: Create the module with its docstring, imports, and `_check`** - -Create `src/autointent/_deps.py`: - -```python -"""Validate optional-extra dependencies from installed package metadata. - -The :func:`require` guard checks that every dependency of an ``autointent`` extra -is installed and version-satisfied. It reads the metadata that the build baked into -the installed distribution (via :mod:`importlib.metadata`) rather than the source -``pyproject.toml``, which is not shipped in the wheel. Nested extras are resolved -recursively, so e.g. the ``transformers`` extra (``transformers[torch]``) -transitively requires ``accelerate`` and that is checked too. -""" - -from __future__ import annotations - -from importlib import metadata - -from packaging.requirements import Requirement - -_DIST = "autointent" - - -def _check(req: Requirement) -> str | None: - """Check a single requirement against the installed environment. - - Args: - req: The parsed requirement to validate. - - Returns: - A human-readable problem description if the distribution is missing or its - installed version does not satisfy ``req.specifier``; ``None`` otherwise. - """ - try: - installed = metadata.version(req.name) - except metadata.PackageNotFoundError: - return f"{req.name}{req.specifier} (not installed)" - if req.specifier and not req.specifier.contains(installed, prereleases=True): - return f"{req.name}{req.specifier} (installed: {installed})" - return None -``` - -- [ ] **Step 3: Write the failing test and shared metadata-faking helper** - -Create `tests/test_deps.py`: - -```python -import re -from importlib import metadata - -from packaging.requirements import Requirement - -import autointent._deps as deps - -_EXTRA_RE = re.compile(r"""extra\s*==\s*['"]([^'"]+)['"]""") - - -class _FakeMeta: - def __init__(self, extras): - self._extras = extras - - def get_all(self, name, failobj=None): - if name == "Provides-Extra": - return list(self._extras) - return failobj - - -def _patch_metadata(monkeypatch, requires_map, versions): - """Patch importlib.metadata so deps.* sees a synthetic dependency graph. - - requires_map: {dist_name: [PEP 508 requirement string, ...]} - versions: {dist_name: installed_version_string} (absent key => not installed) - """ - def fake_requires(dist): - return requires_map.get(dist, []) - - def fake_version(name): - if name not in versions: - raise metadata.PackageNotFoundError(name) - return versions[name] - - def fake_metadata(dist): - extras = sorted({e for s in requires_map.get(dist, []) for e in _EXTRA_RE.findall(s)}) - return _FakeMeta(extras) - - monkeypatch.setattr(deps.metadata, "requires", fake_requires) - monkeypatch.setattr(deps.metadata, "version", fake_version) - monkeypatch.setattr(deps.metadata, "metadata", fake_metadata) - - -def test_check_returns_none_when_satisfied(monkeypatch): - _patch_metadata(monkeypatch, {}, {"catboost": "1.5.0"}) - assert deps._check(Requirement("catboost>=1.2.8,<2.0.0")) is None - - -def test_check_reports_missing(monkeypatch): - _patch_metadata(monkeypatch, {}, {}) - problem = deps._check(Requirement("catboost>=1.2.8")) - assert problem is not None - assert "catboost" in problem - assert "not installed" in problem - - -def test_check_reports_outdated(monkeypatch): - _patch_metadata(monkeypatch, {}, {"catboost": "1.0.0"}) - problem = deps._check(Requirement("catboost>=1.2.8,<2.0.0")) - assert problem is not None - assert "installed: 1.0.0" in problem -``` - -- [ ] **Step 4: Run the tests** - -Run: `pytest tests/test_deps.py -q` -Expected: 3 passed. - -- [ ] **Step 5: Lint and type-check** - -Run: `ruff check src/autointent/_deps.py tests/test_deps.py && mypy src/autointent/_deps.py` -Expected: no errors. - -- [ ] **Step 6: Commit** - -```bash -git add pyproject.toml src/autointent/_deps.py tests/test_deps.py -git commit -m "feat(deps): add _check leaf version validator + packaging core dep (#322)" -``` - ---- - -### Task 2: `_iter_extra_reqs` — read a dist's requirements for one extra - -**Files:** -- Modify: `src/autointent/_deps.py` -- Test: `tests/test_deps.py` - -**Interfaces:** -- Consumes: nothing new. -- Produces: `_iter_extra_reqs(dist: str, extra: str) -> list[Requirement]` — the requirements of `dist` activated by `extra` for the current environment. Base dependencies (no extra marker, or only an environment marker) are excluded. - -- [ ] **Step 1: Write the failing test** - -Add to `tests/test_deps.py`: - -```python -def test_iter_extra_reqs_selects_only_extra_members(monkeypatch): - _patch_metadata( - monkeypatch, - {"autointent": [ - "numpy>=1.0 ; python_version >= '3.0'", # base dep w/ env marker -> excluded - "torch>=2.0", # base dep, no marker -> excluded - "catboost>=1.2.8,<2.0.0 ; extra == 'catboost'", # extra member -> included - "peft>=0.10.0 ; extra == 'peft'", # different extra -> excluded - ]}, - {}, - ) - reqs = deps._iter_extra_reqs("autointent", "catboost") - assert {r.name for r in reqs} == {"catboost"} -``` - -- [ ] **Step 2: Run it to confirm it fails** - -Run: `pytest tests/test_deps.py::test_iter_extra_reqs_selects_only_extra_members -q` -Expected: FAIL with `AttributeError: module 'autointent._deps' has no attribute '_iter_extra_reqs'`. - -- [ ] **Step 3: Implement `_iter_extra_reqs`** - -In `src/autointent/_deps.py`, add `from packaging.utils import canonicalize_name` to the imports (keep the import block sorted: `Requirement` then `canonicalize_name`), and append: - -```python -def _iter_extra_reqs(dist: str, extra: str) -> list[Requirement]: - """Return the requirements of ``dist`` activated by ``extra``. - - A requirement is included only when its marker is satisfied *because* of the - extra: it must evaluate true with the extra set and false with no extra. This - excludes base dependencies that merely carry an environment marker. - - Args: - dist: Distribution name whose metadata is read. - extra: Extra name whose dependencies are wanted. - - Returns: - The parsed requirements activated by ``extra`` in the current environment. - """ - target = str(canonicalize_name(extra)) - result: list[Requirement] = [] - for spec in metadata.requires(dist) or []: - req = Requirement(spec) - marker = req.marker - if marker is None: - continue - if marker.evaluate({"extra": target}) and not marker.evaluate({"extra": ""}): - result.append(req) - return result -``` - -- [ ] **Step 4: Run the tests** - -Run: `pytest tests/test_deps.py -q` -Expected: all passed. - -- [ ] **Step 5: Lint and type-check** - -Run: `ruff check src/autointent/_deps.py tests/test_deps.py && mypy src/autointent/_deps.py` -Expected: no errors. - -- [ ] **Step 6: Commit** - -```bash -git add src/autointent/_deps.py tests/test_deps.py -git commit -m "feat(deps): resolve a dist's requirements for a single extra" -``` - ---- - -### Task 3: `_resolve` — recursive walk with cycle guard + cached entry point - -**Files:** -- Modify: `src/autointent/_deps.py` -- Test: `tests/test_deps.py` - -**Interfaces:** -- Consumes: `_iter_extra_reqs`. -- Produces: - - `_resolve(dist: str, extra: str, seen: set[tuple[str, str]]) -> list[Requirement]` — every leaf requirement reachable from `dist[extra]`, recursing through nested extras, cycle-guarded by `seen`. - - `_resolve_cached(dist: str, extra: str) -> tuple[Requirement, ...]` — memoized wrapper (call this from `require`). - -- [ ] **Step 1: Write the failing tests + cache-clear fixture** - -First add `import pytest` to the **third-party** import group of `tests/test_deps.py` — it must come *before* `from packaging.requirements import Requirement` (ruff's isort sorts straight `import` statements before `from` imports within a section). Then add the fixture and tests (the autouse fixture keeps the cache from leaking synthetic graphs across tests): - -```python -@pytest.fixture(autouse=True) -def _clear_resolve_cache(): - deps._resolve_cached.cache_clear() - yield - deps._resolve_cached.cache_clear() - - -def test_resolve_recurses_into_nested_extra(monkeypatch): - _patch_metadata( - monkeypatch, - { - "autointent": ["transformers[torch]>=4.49.0,<5.0.0 ; extra == 'transformers'"], - "transformers": [ - "torch>=2.2 ; extra == 'torch'", - "accelerate>=0.26.0 ; extra == 'torch'", - ], - }, - {}, - ) - reqs = deps._resolve("autointent", "transformers", set()) - assert {r.name for r in reqs} == {"transformers", "torch", "accelerate"} - - -def test_resolve_terminates_on_cycle(monkeypatch): - _patch_metadata( - monkeypatch, - {"pkg": [ - "pkg[b]>=1.0 ; extra == 'a'", - "pkg[a]>=1.0 ; extra == 'b'", - ]}, - {}, - ) - reqs = deps._resolve("pkg", "a", set()) - assert {r.name for r in reqs} == {"pkg"} - - -def test_resolve_cached_returns_tuple(monkeypatch): - _patch_metadata( - monkeypatch, - {"autointent": ["catboost>=1.2.8 ; extra == 'catboost'"]}, - {}, - ) - result = deps._resolve_cached("autointent", "catboost") - assert isinstance(result, tuple) - assert {r.name for r in result} == {"catboost"} -``` - -- [ ] **Step 2: Run them to confirm they fail** - -Run: `pytest tests/test_deps.py -k resolve -q` -Expected: FAIL with `AttributeError: module 'autointent._deps' has no attribute '_resolve'`. - -- [ ] **Step 3: Implement `_resolve` and `_resolve_cached`** - -In `src/autointent/_deps.py`, add `from functools import cache` to the imports (above `from importlib import metadata`), and append (use `@cache`, not `@lru_cache(maxsize=None)` — ruff `ALL` raises `UP033` for the latter; `functools.cache` exists on py3.10 and still exposes `.cache_clear()`): - -```python -def _resolve(dist: str, extra: str, seen: set[tuple[str, str]]) -> list[Requirement]: - """Recursively collect every leaf requirement activated by ``dist[extra]``. - - Each activated requirement is returned for version checking, and any nested - extras it declares (e.g. ``transformers[torch]``) are resolved in turn. - - Args: - dist: Distribution name to start from. - extra: Extra name to resolve. - seen: Visited ``(dist, extra)`` pairs, used to break dependency cycles. - - Returns: - The flattened list of requirements to validate. - """ - key = (str(canonicalize_name(dist)), str(canonicalize_name(extra))) - if key in seen: - return [] - seen.add(key) - - leaves: list[Requirement] = [] - for req in _iter_extra_reqs(dist, extra): - leaves.append(req) - for nested in req.extras: - leaves.extend(_resolve(req.name, nested, seen)) - return leaves - - -@cache -def _resolve_cached(dist: str, extra: str) -> tuple[Requirement, ...]: - """Memoized :func:`_resolve`; the metadata graph shape is stable per process. - - Args: - dist: Distribution name to start from. - extra: Extra name to resolve. - - Returns: - The resolved requirements as an immutable tuple. - """ - return tuple(_resolve(dist, extra, set())) -``` - -- [ ] **Step 4: Run the tests** - -Run: `pytest tests/test_deps.py -q` -Expected: all passed. - -- [ ] **Step 5: Lint and type-check** - -Run: `ruff check src/autointent/_deps.py tests/test_deps.py && mypy src/autointent/_deps.py` -Expected: no errors. - -- [ ] **Step 6: Commit** - -```bash -git add src/autointent/_deps.py tests/test_deps.py -git commit -m "feat(deps): recursive extra resolution with cycle guard + cache" -``` - ---- - -### Task 4: `require` — public guard (unknown-extra check, aggregation, message) - -**Files:** -- Modify: `src/autointent/_deps.py` -- Test: `tests/test_deps.py` - -**Interfaces:** -- Consumes: `_resolve_cached`, `_check`. -- Produces: `require(extra: str, *, dist: str = "autointent") -> None`. Raises `ValueError` if `dist` declares no such extra (typo guard); raises `ImportError` listing every missing/outdated dependency with a `pip install '[]'` hint; otherwise returns `None`. This is the symbol every call site imports. - -- [ ] **Step 1: Write the failing tests** - -Add to `tests/test_deps.py`: - -```python -def test_require_passes_when_all_present(monkeypatch): - _patch_metadata( - monkeypatch, - {"autointent": ["catboost>=1.2.8,<2.0.0 ; extra == 'catboost'"]}, - {"catboost": "1.5.0"}, - ) - deps.require("catboost") # must not raise - - -def test_require_raises_for_missing_leaf(monkeypatch): - _patch_metadata( - monkeypatch, - {"autointent": ["catboost>=1.2.8,<2.0.0 ; extra == 'catboost'"]}, - {}, - ) - with pytest.raises(ImportError) as exc: - deps.require("catboost") - text = str(exc.value) - assert "catboost" in text - assert "not installed" in text - assert "pip install 'autointent[catboost]'" in text - - -def test_require_raises_for_outdated_version(monkeypatch): - _patch_metadata( - monkeypatch, - {"autointent": ["catboost>=1.2.8,<2.0.0 ; extra == 'catboost'"]}, - {"catboost": "1.0.0"}, - ) - with pytest.raises(ImportError) as exc: - deps.require("catboost") - assert "installed: 1.0.0" in str(exc.value) - - -def test_require_detects_missing_nested_accelerate(monkeypatch): - # Regression for #322: accelerate lives in transformers' own [torch] extra, - # so a transformers-present-but-accelerate-absent env must still be flagged. - _patch_metadata( - monkeypatch, - { - "autointent": ["transformers[torch]>=4.49.0,<5.0.0 ; extra == 'transformers'"], - "transformers": [ - "torch>=2.2 ; extra == 'torch'", - "accelerate>=0.26.0 ; extra == 'torch'", - ], - }, - {"transformers": "4.49.0", "torch": "2.2.0"}, # accelerate absent - ) - with pytest.raises(ImportError) as exc: - deps.require("transformers") - assert "accelerate" in str(exc.value) - - -def test_require_rejects_unknown_extra(monkeypatch): - _patch_metadata( - monkeypatch, - {"autointent": ["catboost>=1.2.8 ; extra == 'catboost'"]}, - {"catboost": "1.5.0"}, - ) - with pytest.raises(ValueError, match="no extra 'transfomers'"): - deps.require("transfomers") # typo -``` - -- [ ] **Step 2: Run them to confirm they fail** - -Run: `pytest tests/test_deps.py -k require -q` -Expected: FAIL with `AttributeError: module 'autointent._deps' has no attribute 'require'`. - -- [ ] **Step 3: Implement `_provides_extras` and `require`** - -In `src/autointent/_deps.py`, append: - -```python -def _provides_extras(dist: str) -> set[str]: - """Return the normalized set of extras declared by ``dist``. - - Args: - dist: Distribution name whose metadata is read. - - Returns: - Normalized extra names from the distribution's ``Provides-Extra`` metadata. - """ - md = metadata.metadata(dist) - return {str(canonicalize_name(e)) for e in (md.get_all("Provides-Extra") or [])} - - -def require(extra: str, *, dist: str = _DIST) -> None: - """Ensure every dependency of an ``autointent`` extra is installed and current. - - Args: - extra: The extra to validate, e.g. ``"transformers"``. - dist: Distribution that declares the extra. Defaults to ``"autointent"``. - - Raises: - ValueError: If ``dist`` declares no such ``extra`` (typically a typo). - ImportError: If any required dependency is missing or its installed version - does not satisfy the constraint declared in the metadata. - """ - known = _provides_extras(dist) - if str(canonicalize_name(extra)) not in known: - msg = f"'{dist}' declares no extra '{extra}'. Known extras: {', '.join(sorted(known))}." - raise ValueError(msg) - - problems: list[str] = [] - for req in _resolve_cached(dist, extra): - problem = _check(req) - if problem is not None and problem not in problems: - problems.append(problem) - - if problems: - bullets = "\n".join(f" - {p}" for p in problems) - msg = ( - f"Feature requires extra '{extra}', but dependencies are missing or outdated:\n" - f"{bullets}\n" - f"Install with: pip install '{dist}[{extra}]'" - ) - raise ImportError(msg) -``` - -- [ ] **Step 4: Run the full module test file** - -Run: `pytest tests/test_deps.py -q` -Expected: all passed. - -- [ ] **Step 5: Lint and type-check** - -Run: `ruff check src/autointent/_deps.py tests/test_deps.py && mypy src/autointent/_deps.py` -Expected: no errors. - -- [ ] **Step 6: Commit** - -```bash -git add src/autointent/_deps.py tests/test_deps.py -git commit -m "feat(deps): add require(extra) guard with aggregated, versioned errors (#322)" -``` - ---- - -### Task 5: Migrate all call sites + remove old `require` from `_utils.py` - -**Files:** -- Modify: `src/autointent/_utils.py` (remove old `require` + now-unused `import importlib`) -- Modify: `user_guides/advanced/02_embedder_configuration.py` (stale prose reference to `autointent._utils.require`) -- Modify (import line `from autointent._utils import require` → `from autointent._deps import require`, and each `require(...)` call): - - `src/autointent/modules/scoring/_bert.py` - - `src/autointent/modules/scoring/_ptuning/ptuning.py` - - `src/autointent/modules/scoring/_lora/lora.py` - - `src/autointent/modules/scoring/_catboost/catboost_scorer.py` - - `src/autointent/_wrappers/embedder/vllm.py` - - `src/autointent/_wrappers/embedder/openai.py` - - `src/autointent/_wrappers/embedder/sentence_transformers.py` - - `src/autointent/_wrappers/ranker.py` - - `src/autointent/generation/_generator.py` - - `src/autointent/_dump_tools/unit_dumpers.py` - -**Interfaces:** -- Consumes: `require(extra: str)` from `autointent._deps` (Task 4). -- Produces: no new symbols. After this task no module imports `require` from `autointent._utils`, and no `require(` call passes more than one argument. - -- [ ] **Step 1: Remove the old `require` and its import from `_utils.py`** - -In `src/autointent/_utils.py`: delete the entire `require` function (the `def require(...)` block and its body) and delete the top-level `import importlib` line (it was only used by `require`). Leave `import torch`, `from typing import TypeVar`, `_funcs_to_dict`, and `detect_device` untouched. - -- [ ] **Step 2: Update every import line** - -In each of the 10 files listed above, change: - -```python -from autointent._utils import require -``` - -to: - -```python -from autointent._deps import require -``` - -- [ ] **Step 3: Update the call sites (one argument = the extra name)** - -Apply these exact replacements: - -- `src/autointent/modules/scoring/_bert.py`: `require("transformers", "transformers")` → `require("transformers")` -- `src/autointent/modules/scoring/_ptuning/ptuning.py`: `require("peft", extra="peft")` → `require("peft")` -- `src/autointent/modules/scoring/_lora/lora.py`: `require("peft", extra="peft")` → `require("peft")` -- `src/autointent/modules/scoring/_catboost/catboost_scorer.py`: `require("catboost", extra="catboost")` → `require("catboost")` -- `src/autointent/_wrappers/embedder/vllm.py`: `require("vllm", extra="vllm")` → `require("vllm")` -- `src/autointent/_wrappers/embedder/openai.py`: `require("tiktoken", "openai")` → `require("openai")` **and** `require("openai", "openai")` → `require("openai")` -- `src/autointent/generation/_generator.py`: `require("openai", "openai")` → `require("openai")` -- `src/autointent/_wrappers/ranker.py`: `require("sentence_transformers", extra="sentence-transformers")` → `require("sentence-transformers")` -- `src/autointent/_wrappers/embedder/sentence_transformers.py`: - - line ~45: `require("transformers", extra="transformers")` → `require("transformers")` - - line ~133: `require("sentence_transformers", extra="sentence-transformers")` → `require("sentence-transformers")` - - the training block (three consecutive calls): - ```python - require("sentence_transformers", extra="sentence-transformers") - require("transformers", extra="transformers") - require("accelerate", extra="transformers") - ``` - becomes (drop the `accelerate` line — it is now covered by recursing into the `transformers` extra): - ```python - require("sentence-transformers") - require("transformers") - ``` -- `src/autointent/_dump_tools/unit_dumpers.py` — replace all occurrences: - - `require("peft", extra="peft")` → `require("peft")` - - `require("transformers", extra="transformers")` → `require("transformers")` - - `require("catboost", extra="catboost")` → `require("catboost")` - -Then fix the stale prose reference in `user_guides/advanced/02_embedder_configuration.py` (line ~22): change `autointent._utils.require` to `autointent._deps.require` (it is documentation prose, not an import — no lint impact, just keeping the docs accurate). - -- [ ] **Step 4: Verify no stale call shape or import remains** - -Run: -```bash -grep -rn 'from autointent._utils import require' src/autointent --include='*.py'; \ -grep -rnE 'require\([^)]*,' src/autointent --include='*.py' | grep -v 'def require' -``` -Expected: **no output** from either grep (no old import; no `require(` call with a second argument). - -- [ ] **Step 5: Lint and type-check the whole package** - -Run: `ruff check src/autointent && mypy src/autointent` -Expected: no errors. - -- [ ] **Step 6: Commit** - -```bash -git add src/autointent -git commit -m "refactor(deps): migrate all require() call sites to extra-name guard (#322)" -``` - ---- - -### Task 6: Real-metadata smoke test + push to CI - -**Files:** -- Modify: `tests/test_deps.py` (add one un-monkeypatched wiring test) - -**Interfaces:** -- Consumes: `_resolve_cached` against the real installed `autointent` metadata. -- Produces: no new symbols. - -- [ ] **Step 1: Add a real-metadata wiring test** - -Add to `tests/test_deps.py` (no `monkeypatch` — exercises the real `importlib.metadata` reads to catch wiring regressions; asserts only resolution shape, not install state, so it is deterministic regardless of which extras CI installs): - -```python -def test_resolve_reads_real_autointent_metadata(): - reqs = deps._resolve_cached("autointent", "catboost") - assert any(r.name == "catboost" for r in reqs) -``` - -- [ ] **Step 2: Run the isolated test file** - -Run: `pytest tests/test_deps.py -q` -Expected: all passed. - -- [ ] **Step 3: Lint and type-check** - -Run: `ruff check src/autointent tests/test_deps.py && mypy src/autointent` -Expected: no errors. - -- [ ] **Step 4: Commit** - -```bash -git add tests/test_deps.py -git commit -m "test(deps): smoke-test require resolution against real metadata" -``` - -- [ ] **Step 5: Push the branch and trigger CI** - -Push the working branch and open a PR (or trigger the coverage dispatch) so the **full** suite — including the migrated call-site modules — runs in CI. Do not run the full suite locally. Confirm CI is green before considering the work done; link the run in the PR referencing #322. - ---- - -## Notes / deviations from the spec - -- **No re-export from `_utils`.** The spec proposed re-exporting `require` from `autointent._utils` to keep import lines untouched. Under `ruff select = ["ALL"]` a bare re-export trips `F401`, and the redundant-alias workaround trips `PLC0414`. Since every call-site file is edited for the signature change anyway, updating its import line to `from autointent._deps import require` is cleaner and lint-clean. The dependency logic now has a single home (`_deps.py`). -- **Cycle-guard key type.** The spec sketched `seen: set[tuple[str, frozenset[str]]]`. The plan uses `set[tuple[str, str]]` because `_resolve` recurses one nested-extra name at a time (it iterates `req.extras` and recurses per name), so the key is `(dist, single-extra-name)`. Behaviourally equivalent for cycle-breaking; simpler. -- **Error-bullet ordering.** Problems are listed in resolution order (e.g. `transformers` before its nested `accelerate`), which is the reverse of the illustrative example in the spec. Cosmetic — no test asserts bullet order. - -## Self-review - -- **Spec coverage:** new `_deps.py` (Tasks 1–4); metadata-not-pyproject premise (module docstring, Task 1); metadata version check (`_check`, Task 1); recursive nested-extra resolution incl. accelerate/#322 (Task 3 + Task 4 regression test); aggregated actionable error (Task 4); `packaging` promoted to core dep (Task 1); all ~20 call sites migrated + clean replace (Task 5); synthetic-metadata TDD for every scenario incl. cycle/unknown-extra (Tasks 1–4); real-metadata smoke test (Task 6); CI verification per project convention (Task 6, Global Constraints). The one spec deviation (re-export) is documented above. -- **Placeholder scan:** none — every code/test step contains complete code; every command has expected output. -- **Type consistency:** `_check`, `_iter_extra_reqs`, `_resolve`, `_resolve_cached`, `_provides_extras`, `require` keep identical signatures across the tasks that define and call them; `_resolve_cached` (not `_resolve`) is the symbol `require` consumes, matching Task 3's "Produces" and Task 4's "Consumes". diff --git a/docs/superpowers/specs/2026-06-22-require-extra-resolver-design.md b/docs/superpowers/specs/2026-06-22-require-extra-resolver-design.md deleted file mode 100644 index 156225905..000000000 --- a/docs/superpowers/specs/2026-06-22-require-extra-resolver-design.md +++ /dev/null @@ -1,170 +0,0 @@ -# Design: metadata-driven `require(extra)` dependency guard - -**Date:** 2026-06-22 -**Issue:** [#322](https://github.com/voorhs/AutoIntent/issues/322) — bert scorer: opaque ImportError when `accelerate` is missing (require() guard checks only `transformers`) -**Status:** Approved, pending implementation plan - -## Problem - -The current `require(dependency, extra=None)` utility (`src/autointent/_utils.py`) tries to -`importlib.import_module(dependency)` and raises an informative `ImportError` naming the pip -extra if the import fails. Two structural weaknesses: - -1. **Manual sync burden.** Each `require` call site hard-codes a module name and an extra label. - These must be kept in sync with `pyproject.toml` by hand. When an extra gains a dependency, - every relevant call site must be updated, and it is easy to miss one. -2. **No version checking.** It only checks *presence* (importability), never whether the installed - version satisfies the constraint declared in `pyproject.toml`. - -Issue #322 is the concrete failure: `BertScorer.__init__` calls `require("transformers", "transformers")`, -which passes whenever `transformers` is importable — even when `accelerate` (pulled by the -`transformers[torch]` extra, required by HF `Trainer`) is absent. The user then hits a raw, deep -`ImportError` from inside `Trainer` instead of AutoIntent's "install `autointent[transformers]`" message. - -## Goal - -Refactor `require` to take an **autointent extra name** and validate that **every** dependency of -that extra — recursively, including nested third-party extras such as `transformers[torch] → accelerate` -— is installed **and** version-satisfied, raising a single aggregated, actionable `ImportError` -otherwise. - -This removes the manual sync burden (single source of truth = the package metadata baked from -`pyproject.toml`), adds version checking, and fixes #322 structurally — with no hand-added -`accelerate` guard. - -## Key technical premise - -`pyproject.toml` is **not** shipped in the wheel and cannot be parsed at runtime. It does not need to -be: the build process bakes the same requirements (with version specifiers and `extra` markers) into -the installed package metadata, which `importlib.metadata` reads at runtime. Verified against the -installed `autointent`: - -``` -transformers[torch]>=4.49.0,<5.0.0 ; extra == 'transformers' -``` - -and, recursing into `transformers`' own metadata: - -``` -torch>=2.2 ; extra == "torch" -accelerate>=0.26.0 ; extra == "torch" -``` - -So the single source of truth is the installed metadata, available everywhere the package is installed. - -## Decisions (from brainstorming Q&A) - -- **Presence check method:** metadata version check via `importlib.metadata` (not import-based). Faster, - no heavy imports (e.g. torch), enables version checking and recursive extra resolution. -- **Resolution depth:** full recursive walk of nested extras, cycle-guarded. This is what catches the - nested `accelerate` requirement automatically. -- **Rollout:** clean replace — change the signature to `require(extra: str)` and migrate every call - site in this PR. `require` is internal (`autointent._utils`, ~20 call sites, no external contract). - -## Design - -### New module `src/autointent/_deps.py` - -Houses the dependency-resolution logic, keeping it out of `_utils.py` (which imports `torch` at module -load) and making it trivially unit-testable. - -- **`require(extra: str, *, dist: str = "autointent") -> None`** — public entry point. Resolves the - extra's full (recursive) leaf-requirement set and validates each. Raises `ImportError` with an - aggregated message if anything is missing or outdated; returns `None` otherwise. - -- **`_iter_extra_reqs(dist, extra)`** — read `importlib.metadata.requires(dist)` (treat `None` as - empty), parse each entry with `packaging.requirements.Requirement`, and keep those whose `.marker` - evaluates true for `{"extra": extra}` combined with the current environment. `packaging` fills - environment markers (`python_version`, `sys_platform`, …) from the running interpreter, so - platform-conditional dependencies resolve correctly. - -- **`_resolve(dist, extra, seen)`** — yields leaf requirements (those with no further extras) reachable - from `dist`'s `extra`, recursing into nested extras. For a requirement like - `transformers[torch]>=4.49.0,<5.0.0` it yields the `transformers` constraint itself and then recurses - into `transformers`' `torch` extra. Cycle-guarded via `seen: set[tuple[str, frozenset[str]]]`. - -- **`_check(req) -> str | None`** — `importlib.metadata.version(req.name)`; return a human-readable - problem string if the dist is absent (`PackageNotFoundError`) or the installed version is not in - `req.specifier`; otherwise `None`. - -- Distribution/extra names normalized with `packaging.utils.canonicalize_name` on both sides for - hyphen/underscore robustness. - -- `_utils.py` re-exports `require` (`from autointent._deps import require`) so existing - `from autointent._utils import require` import lines stay untouched. - -- **Caching:** memoize the pure resolution step (extra → tuple of leaf requirements) with - `functools.lru_cache` keyed by `(dist, extra)`, so hot module-init paths do not re-walk metadata. - Version checks run on each call (cheap). - -### The #322 mechanism, concretely - -`transformers[torch]>=4.49.0,<5.0.0` parses to `name="transformers"`, `extras={"torch"}`, plus the -specifier. `_resolve` checks the `transformers` version against the specifier, then recurses via -`_iter_extra_reqs("transformers", "torch")` → `accelerate>=0.26.0`, `torch>=2.2`. A missing `accelerate` -surfaces here, named explicitly, with the `autointent[transformers]` install hint. - -### Error shape (aggregated) - -``` -ImportError: Feature requires extra 'transformers', but dependencies are missing or outdated: - - accelerate>=0.26.0 (not installed) - - transformers>=4.49.0,<5.0.0 (installed: 4.30.0) -Install with: pip install 'autointent[transformers]' -``` - -### pyproject change - -Add `packaging` to **core** `dependencies` (it is currently present only transitively — promoting it to -a declared dependency avoids repeating the exact transitive-reliance mistake behind #322). Conservative -floor, e.g. `packaging (>=23.2)`; the `Requirement` / `SpecifierSet` / `canonicalize_name` / -`Marker.evaluate` APIs used here have been stable for years. The exact floor will be confirmed during -implementation. - -### Call-site migration - -All ~20 sites collapse to passing just the extra name. Each is mapped precisely during implementation, -preserving current intent. Representative cases: - -| File | Before | After | -| --- | --- | --- | -| `modules/scoring/_bert.py` | `require("transformers", "transformers")` | `require("transformers")` | -| `modules/scoring/_ptuning/ptuning.py`, `_lora/lora.py` | `require("peft", extra="peft")` | `require("peft")` | -| `modules/scoring/_catboost/catboost_scorer.py` | `require("catboost", extra="catboost")` | `require("catboost")` | -| `_wrappers/embedder/vllm.py` | `require("vllm", extra="vllm")` | `require("vllm")` | -| `_wrappers/embedder/openai.py` | `require("tiktoken","openai")` + `require("openai","openai")` | `require("openai")` | -| `generation/_generator.py` | `require("openai","openai")` | `require("openai")` | -| `_wrappers/ranker.py` | `require("sentence_transformers", extra="sentence-transformers")` | `require("sentence-transformers")` | -| `_wrappers/embedder/sentence_transformers.py` | `require("transformers")` + `require("accelerate")` + `require("sentence_transformers")` | collapse (accelerate covered by recursion) | -| `_dump_tools/unit_dumpers.py` | peft / transformers / catboost requires | `require("peft")` / `require("transformers")` / `require("catboost")` | - -### Testing (TDD, synthetic metadata) - -Unit tests inject a fake requirement graph (monkeypatch `_iter_extra_reqs` or the underlying -`importlib.metadata.requires` / `version`) so assertions do not depend on what CI happens to install: - -- all dependencies present and version-satisfied → no raise -- a leaf dependency missing → `ImportError` naming the dist and the extra + install hint -- installed version too old → `ImportError` showing installed vs required -- **nested-extra dependency missing (the `accelerate` case) → `ImportError` names `accelerate`** (proves #322) -- cyclic extras → resolution terminates -- unknown extra name → clear error - -Plus one real-metadata smoke test against a known-present extra to catch wiring regressions. - -Per project convention, tests are verified via CI (push branch + coverage dispatch), not run locally. - -## Out of scope - -- Changing what each module actually imports at runtime. -- Resolving full transitive *non-extra* dependency trees — we only walk declared extras, which is where - the relevant guards belong. -- Source checkouts with no installed metadata (dev installs always have `.dist-info`/PEP 660 metadata). - -## Rejected alternatives - -- **Import-based presence check** — slower, drags in heavy imports (torch), and needs an - import-name ↔ dist-name map. -- **One-level resolution (autointent extras only)** — would not catch `accelerate`, since it is nested - inside `transformers`' `torch` extra; #322 would still need a manual guard. -- **Hand-maintained extra → deps map** — still a sync burden, just centralized. From 5aebe1990671ad3793720354ee9ca14429dbd86e Mon Sep 17 00:00:00 2001 From: voorhs Date: Tue, 23 Jun 2026 23:27:59 +0300 Subject: [PATCH 16/16] remove optional `dist` argument from `require` --- src/autointent/_deps.py | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/src/autointent/_deps.py b/src/autointent/_deps.py index c3661b049..f56b2a536 100644 --- a/src/autointent/_deps.py +++ b/src/autointent/_deps.py @@ -169,25 +169,24 @@ def _provides_extras(dist: str) -> set[str]: return {str(canonicalize_name(e)) for e in (md.get_all("Provides-Extra") or [])} -def require(extra: Extra, *, dist: str = _DIST) -> None: +def require(extra: Extra) -> None: """Ensure every dependency of an ``autointent`` extra is installed and current. Args: extra: The extra to validate, e.g. ``"transformers"``. - dist: Distribution that declares the extra. Defaults to ``"autointent"``. Raises: - ValueError: If ``dist`` declares no such ``extra`` (typically a typo). + ValueError: If ``autointent`` declares no such ``extra`` (typically a typo). ImportError: If any required dependency is missing or its installed version does not satisfy the constraint declared in the metadata. """ - known = _provides_extras(dist) + known = _provides_extras(_DIST) if str(canonicalize_name(extra)) not in known: - msg = f"'{dist}' declares no extra '{extra}'. Known extras: {', '.join(sorted(known))}." + msg = f"'{_DIST}' declares no extra '{extra}'. Known extras: {', '.join(sorted(known))}." raise ValueError(msg) problems: list[str] = [] - for req in _resolve_cached(dist, extra): + for req in _resolve_cached(_DIST, extra): problem = _check(req) if problem is not None and problem not in problems: problems.append(problem) @@ -197,6 +196,6 @@ def require(extra: Extra, *, dist: str = _DIST) -> None: msg = ( f"Feature requires extra '{extra}', but dependencies are missing or outdated:\n" f"{bullets}\n" - f"Install with: pip install '{dist}[{extra}]'" + f"Install with: pip install '{_DIST}[{extra}]'" ) raise ImportError(msg)