From 2b5ea8f8477caedaf69a8a6820928978471437ed Mon Sep 17 00:00:00 2001
From: voorhs <ilya_alekseev_2016@list.ru>
Date: Sat, 27 Jun 2026 22:42:24 +0300
Subject: [PATCH 1/7] feat(deps): upgrade transformers to 5.x and
 sentence-transformers to 5.2+ (#295)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The 4.57.x mistral-regex codepath called `huggingface_hub.model_info()` on
every tokenizer load with vocab >100k (e.g. `intfloat/multilingual-e5-*`),
hammering HF's rate limit in CI and in production. transformers 5.0+ caches
that probe per-process and respects `local_files_only`/`HF_HUB_OFFLINE`.

The bump is necessarily a coordinated two-package migration: ST 5.2.0 is
the first release that lifts the `transformers<5.0.0` cap. Resolved
versions: transformers 5.12.1, sentence-transformers 5.6.0.

Adjusts the v5.x surfaces that actually broke:

- ranker.py: `cross_encoder.model.classifier` → `cross_encoder[0].auto_model.classifier`
  (ST 5 restructured CrossEncoder into a nn.Sequential of modules).
- ranker.py: CrossEncoder.predict() renamed `activation_fct` → `activation_fn`.
- ranker.py: `cross_encoder.model.cpu()` → `cross_encoder.cpu()` (the wrapper
  is itself an nn.Module now, no underlying `.model` attribute).
- embedder/sentence_transformers.py: import `losses`/`training_args` from
  `sentence_transformers.sentence_transformer` (top-level path deprecated).
- embedder/sentence_transformers.py: `warmup_ratio=` → `warmup_steps=` (v5
  TrainingArguments accepts a float <1.0 there as a ratio).
- test_sentence_transformers_backend.py: `get_sentence_embedding_dimension()`
  → `get_embedding_dimension()`.

Removes the `_disable_transformers_mistral_regex_patch` workaround from
tests/conftest.py — the underlying bug is fixed in v5.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
---
 pyproject.toml                                |  4 +--
 .../embedder/sentence_transformers.py         |  7 ++--
 src/autointent/_wrappers/ranker.py            | 15 +++++++--
 tests/conftest.py                             | 33 -------------------
 .../test_sentence_transformers_backend.py     |  2 +-
 5 files changed, 19 insertions(+), 42 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index 5fbabaf86..0f85942c9 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -51,8 +51,8 @@ dependencies = [
 [project.optional-dependencies]
 catboost = ["catboost (>=1.2.8,<2.0.0)"]
 peft = ["peft (>= 0.10.0, !=0.15.0, !=0.15.1, <1.0.0)"]
-transformers = ["transformers[torch] (>=4.49.0,<5.0.0)"]
-sentence-transformers = ["sentence-transformers (>=3,<4)"]
+transformers = ["transformers[torch] (>=5.0.0,<6.0.0)"]
+sentence-transformers = ["sentence-transformers (>=5.2.0,<6.0.0)"]
 dspy = [
     "dspy (>=2.6.5,<3.0.0)",
 ]
diff --git a/src/autointent/_wrappers/embedder/sentence_transformers.py b/src/autointent/_wrappers/embedder/sentence_transformers.py
index 772737fed..a4b2335df 100644
--- a/src/autointent/_wrappers/embedder/sentence_transformers.py
+++ b/src/autointent/_wrappers/embedder/sentence_transformers.py
@@ -299,9 +299,8 @@ def train(self, utterances: list[str], labels: ListOfLabels, config: EmbedderFin
         from sentence_transformers import (
             SentenceTransformerTrainer,
             SentenceTransformerTrainingArguments,
-            losses,
-            training_args,
         )
+        from sentence_transformers.sentence_transformer import losses, training_args
         from transformers import EarlyStoppingCallback
 
         x_train, x_val, y_train, y_val = train_test_split(
@@ -324,7 +323,9 @@ def train(self, utterances: list[str], labels: ListOfLabels, config: EmbedderFin
                 per_device_train_batch_size=config.batch_size,
                 per_device_eval_batch_size=config.batch_size,
                 learning_rate=config.learning_rate,
-                warmup_ratio=config.warmup_ratio,
+                # transformers v5 deprecated `warmup_ratio` in favor of `warmup_steps`,
+                # which now accepts a float < 1.0 as a fraction of total training steps.
+                warmup_steps=config.warmup_ratio,
                 fp16=config.fp16,
                 bf16=config.bf16,
                 seed=config.seed,
diff --git a/src/autointent/_wrappers/ranker.py b/src/autointent/_wrappers/ranker.py
index 8e73f0be5..9aad7f27b 100644
--- a/src/autointent/_wrappers/ranker.py
+++ b/src/autointent/_wrappers/ranker.py
@@ -136,7 +136,13 @@ def __init__(
         if classifier_head is not None or self.config.train_head:
             self._train_head = True
             self._activations_list: list[npt.NDArray[Any]] = []
-            self._hook_handler = self.cross_encoder.model.classifier.register_forward_hook(self._classifier_hook)
+            # sentence-transformers v5 restructured CrossEncoder into a nn.Sequential
+            # of modules: cross_encoder[0] is a Transformer wrapping the underlying
+            # AutoModelForSequenceClassification (exposed as .auto_model). The
+            # classifier head still lives on that HF model.
+            self._hook_handler = self.cross_encoder[0].auto_model.classifier.register_forward_hook(
+                self._classifier_hook
+            )
 
     def _classifier_hook(self, _module, input_tensor, _output_tensor) -> None:  # type: ignore[no-untyped-def] # noqa: ANN001
         """Hook to capture classifier activations.
@@ -163,7 +169,7 @@ def _get_features_or_predictions(self, pairs: list[tuple[str, str]]) -> npt.NDAr
                 self.cross_encoder.predict(
                     pairs,
                     batch_size=self.config.batch_size,
-                    activation_fct=nn.Sigmoid() if self.output_range == "sigmoid" else nn.Tanh(),
+                    activation_fn=nn.Sigmoid() if self.output_range == "sigmoid" else nn.Tanh(),
                 )
             )
 
@@ -311,7 +317,10 @@ def load(cls, path: Path, override_config: CrossEncoderConfig | None = None) ->
 
     def clear_ram(self) -> None:
         """Clear model from RAM and GPU memory."""
-        self.cross_encoder.model.cpu()
+        # sentence-transformers v5 CrossEncoder is itself a nn.Sequential, so we
+        # call .cpu() on the wrapper directly instead of the (now-absent)
+        # underlying `.model` attribute.
+        self.cross_encoder.cpu()
         del self.cross_encoder
         gc.collect()
         torch.cuda.empty_cache()
diff --git a/tests/conftest.py b/tests/conftest.py
index 0845a3e40..397fd2142 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -25,39 +25,6 @@
     from autointent.nodes import NodeOptimizer
 
 
-def _disable_transformers_mistral_regex_patch() -> None:
-    # transformers.PreTrainedTokenizerBase._patch_mistral_regex calls
-    # huggingface_hub.model_info() for every tokenizer load with vocab > 100k
-    # (e.g. XLM-RoBERTa-based models like intfloat/multilingual-e5-*). On CI
-    # that uncacheable API call hammers the HF rate limit (429s). Tests never
-    # load mistralai tokenizers, so the correction is pure overhead — replace
-    # it with a no-op for the whole test session.
-    #
-    # Upstream bug & fix (merged for transformers 5.0.0+, NOT backported to 4.x):
-    #   https://github.com/huggingface/transformers/issues/44843
-    #   https://github.com/huggingface/transformers/pull/45444
-    # Drop this workaround when we upgrade to transformers>=5.0:
-    #   https://github.com/deeppavlov/AutoIntent/issues/295
-    try:
-        from transformers import tokenization_utils_base
-    except ImportError:
-        return
-
-    base = getattr(tokenization_utils_base, "PreTrainedTokenizerBase", None)
-    if base is None or not hasattr(base, "_patch_mistral_regex"):
-        return
-
-    def _noop_patch_mistral_regex(  # type: ignore[no-untyped-def]  # reason: monkey-patched into transformers internal classmethod; transformers is in ignore_missing_imports so signature types are unavailable
-        cls, tokenizer, *args, **kwargs
-    ):
-        return tokenizer
-
-    base._patch_mistral_regex = classmethod(_noop_patch_mistral_regex)
-
-
-_disable_transformers_mistral_regex_patch()
-
-
 def get_dataset_path() -> Path:
     return cast("Path", ires.files("tests.assets.data").joinpath("clinc_subset.json"))
 
diff --git a/tests/embedder/test_sentence_transformers_backend.py b/tests/embedder/test_sentence_transformers_backend.py
index b0c72a800..348ad4165 100644
--- a/tests/embedder/test_sentence_transformers_backend.py
+++ b/tests/embedder/test_sentence_transformers_backend.py
@@ -43,7 +43,7 @@ def test_model_lazy_loading(self, st_backend: SentenceTransformerEmbeddingBacken
         # cannot see the mutation inside `.embed()`. The post-call assert is
         # the whole point of this test (lazy load: None -> non-None).
         assert st_backend._model is not None
-        assert embeddings.shape == (1, st_backend._model.get_sentence_embedding_dimension())  # type: ignore[unreachable]
+        assert embeddings.shape == (1, st_backend._model.get_embedding_dimension())  # type: ignore[unreachable]
 
     def test_clear_ram(self, st_backend: SentenceTransformerEmbeddingBackend) -> None:
         """Test clearing model from RAM."""

From 14f9576cb4556bc4c08e2877acacd9aeb85af5b1 Mon Sep 17 00:00:00 2001
From: voorhs <ilya_alekseev_2016@list.ru>
Date: Sat, 27 Jun 2026 23:00:41 +0300
Subject: [PATCH 2/7] fix(deps): address review findings on transformers v5
 migration
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- Bump sentence-transformers lower bound 5.2.0 → 5.4.0. The new ranker /
  embedder paths (cross_encoder[0] subscript, sentence_transformers.
  sentence_transformer subpackage, get_embedding_dimension) all landed
  in 5.4.0; the previous floor would have ModuleNotFoundError'd /
  AttributeError'd anyone resolving 5.2.x–5.3.x.
- Constrain EmbedderFineTuningConfig.warmup_ratio to (0, 1). v5
  TrainingArguments interprets warmup_steps>=1 as a raw step count
  and <1 as a fraction, so a stray warmup_ratio=1.0 would silently
  produce one warmup step instead of full-training warmup.
- Refresh tests/test_deps.py synthetic metadata fixtures to v5
  version strings so the resolver tests exercise the version range
  we ship, not the v4 range we just left behind.
- Trim the v4→v5 narrating comments down to the WHY of the current
  code; per-line migration history belongs in the commit log.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
---
 pyproject.toml                                           | 2 +-
 .../_wrappers/embedder/sentence_transformers.py          | 3 +--
 src/autointent/_wrappers/ranker.py                       | 9 ++-------
 src/autointent/configs/_transformers.py                  | 5 ++++-
 tests/test_deps.py                                       | 8 ++++----
 5 files changed, 12 insertions(+), 15 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index 0f85942c9..a94e05693 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -52,7 +52,7 @@ dependencies = [
 catboost = ["catboost (>=1.2.8,<2.0.0)"]
 peft = ["peft (>= 0.10.0, !=0.15.0, !=0.15.1, <1.0.0)"]
 transformers = ["transformers[torch] (>=5.0.0,<6.0.0)"]
-sentence-transformers = ["sentence-transformers (>=5.2.0,<6.0.0)"]
+sentence-transformers = ["sentence-transformers (>=5.4.0,<6.0.0)"]
 dspy = [
     "dspy (>=2.6.5,<3.0.0)",
 ]
diff --git a/src/autointent/_wrappers/embedder/sentence_transformers.py b/src/autointent/_wrappers/embedder/sentence_transformers.py
index a4b2335df..70920b364 100644
--- a/src/autointent/_wrappers/embedder/sentence_transformers.py
+++ b/src/autointent/_wrappers/embedder/sentence_transformers.py
@@ -323,8 +323,7 @@ def train(self, utterances: list[str], labels: ListOfLabels, config: EmbedderFin
                 per_device_train_batch_size=config.batch_size,
                 per_device_eval_batch_size=config.batch_size,
                 learning_rate=config.learning_rate,
-                # transformers v5 deprecated `warmup_ratio` in favor of `warmup_steps`,
-                # which now accepts a float < 1.0 as a fraction of total training steps.
+                # warmup_steps accepts a float < 1 as a fraction of total steps.
                 warmup_steps=config.warmup_ratio,
                 fp16=config.fp16,
                 bf16=config.bf16,
diff --git a/src/autointent/_wrappers/ranker.py b/src/autointent/_wrappers/ranker.py
index 9aad7f27b..cf07e9406 100644
--- a/src/autointent/_wrappers/ranker.py
+++ b/src/autointent/_wrappers/ranker.py
@@ -136,10 +136,8 @@ def __init__(
         if classifier_head is not None or self.config.train_head:
             self._train_head = True
             self._activations_list: list[npt.NDArray[Any]] = []
-            # sentence-transformers v5 restructured CrossEncoder into a nn.Sequential
-            # of modules: cross_encoder[0] is a Transformer wrapping the underlying
-            # AutoModelForSequenceClassification (exposed as .auto_model). The
-            # classifier head still lives on that HF model.
+            # CrossEncoder is a nn.Sequential of modules; [0] is the Transformer
+            # wrapping the HF model exposed as .auto_model.
             self._hook_handler = self.cross_encoder[0].auto_model.classifier.register_forward_hook(
                 self._classifier_hook
             )
@@ -317,9 +315,6 @@ def load(cls, path: Path, override_config: CrossEncoderConfig | None = None) ->
 
     def clear_ram(self) -> None:
         """Clear model from RAM and GPU memory."""
-        # sentence-transformers v5 CrossEncoder is itself a nn.Sequential, so we
-        # call .cpu() on the wrapper directly instead of the (now-absent)
-        # underlying `.model` attribute.
         self.cross_encoder.cpu()
         del self.cross_encoder
         gc.collect()
diff --git a/src/autointent/configs/_transformers.py b/src/autointent/configs/_transformers.py
index 2922eb81f..55a4a88e7 100644
--- a/src/autointent/configs/_transformers.py
+++ b/src/autointent/configs/_transformers.py
@@ -28,7 +28,10 @@ class EmbedderFineTuningConfig(BaseModel):
     batch_size: int
     margin: float = Field(default=0.5)
     learning_rate: float = Field(default=2e-5)
-    warmup_ratio: float = Field(default=0.1)
+    # Fed to TrainingArguments.warmup_steps in v5, which interprets float<1 as
+    # a fraction of total steps and float>=1 as a raw step count. Restrict to
+    # (0, 1) so warmup_ratio=1.0 doesn't silently become a single step.
+    warmup_ratio: float = Field(default=0.1, gt=0, lt=1)
     early_stopping_patience: int = Field(default=1)
     early_stopping_threshold: float = Field(default=0.0)
     val_fraction: float = Field(default=0.2)
diff --git a/tests/test_deps.py b/tests/test_deps.py
index a5c74a716..862fbac15 100644
--- a/tests/test_deps.py
+++ b/tests/test_deps.py
@@ -104,7 +104,7 @@ def test_resolve_recurses_into_nested_extra(monkeypatch: pytest.MonkeyPatch) ->
     _patch_metadata(
         monkeypatch,
         {
-            "autointent": ["transformers[torch]>=4.49.0,<5.0.0 ; extra == 'transformers'"],
+            "autointent": ["transformers[torch]>=5.0.0,<6.0.0 ; extra == 'transformers'"],
             "transformers": [
                 "torch>=2.2 ; extra == 'torch'",
                 "accelerate>=0.26.0 ; extra == 'torch'",
@@ -180,13 +180,13 @@ def test_require_detects_missing_nested_accelerate(monkeypatch: pytest.MonkeyPat
     _patch_metadata(
         monkeypatch,
         {
-            "autointent": ["transformers[torch]>=4.49.0,<5.0.0 ; extra == 'transformers'"],
+            "autointent": ["transformers[torch]>=5.0.0,<6.0.0 ; extra == 'transformers'"],
             "transformers": [
                 "torch>=2.2 ; extra == 'torch'",
                 "accelerate>=0.26.0 ; extra == 'torch'",
             ],
         },
-        {"transformers": "4.49.0", "torch": "2.2.0"},  # accelerate absent
+        {"transformers": "5.0.0", "torch": "2.2.0"},  # accelerate absent
     )
     with pytest.raises(ImportError) as exc:
         deps.require("transformers")
@@ -200,7 +200,7 @@ def test_require_reports_extra_package_entirely_missing(monkeypatch: pytest.Monk
     # `transformers[torch]` requirement is flagged as missing with the install hint.
     _patch_metadata(
         monkeypatch,
-        {"autointent": ["transformers[torch]>=4.49.0,<5.0.0 ; extra == 'transformers'"]},
+        {"autointent": ["transformers[torch]>=5.0.0,<6.0.0 ; extra == 'transformers'"]},
         {},  # transformers (and everything else) absent
     )
     with pytest.raises(ImportError) as exc:

From b94a27e88e61d58cafa860c738ec7877df7d0dee Mon Sep 17 00:00:00 2001
From: voorhs <ilya_alekseev_2016@list.ru>
Date: Sat, 27 Jun 2026 23:05:30 +0300
Subject: [PATCH 3/7] fix: relax warmup_ratio lower bound and regenerate schema
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Reviewer flagged that `gt=0` rejects the legal `warmup_ratio=0.0` config
(disable warmup). Relax to `ge=0`; `lt=1` is kept because that's the
v5 boundary where warmup_steps flips from ratio to raw step count.

Regenerate the published JSON schema so it reflects the constraint —
otherwise YAML authoring against the schema would pass schema
validation and fail at runtime.

Pushed back on the reviewer's claim that `warmup_steps=0.1` runs zero
warmup: transformers v5 typed `warmup_steps: float` and `get_warmup_steps`
branches on `>= 1`, not `> 0` — `0.1` takes the `math.ceil(N * 0.1)`
fraction branch (training_args.py:2089 in v5.12.1).

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
---
 docs/optimizer_search_space_config.schema.json | 2 ++
 src/autointent/configs/_transformers.py        | 6 +++---
 2 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/docs/optimizer_search_space_config.schema.json b/docs/optimizer_search_space_config.schema.json
index df8560ea3..f31195dcf 100644
--- a/docs/optimizer_search_space_config.schema.json
+++ b/docs/optimizer_search_space_config.schema.json
@@ -1302,6 +1302,8 @@
                 },
                 "warmup_ratio": {
                     "default": 0.1,
+                    "exclusiveMaximum": 1,
+                    "minimum": 0,
                     "title": "Warmup Ratio",
                     "type": "number"
                 },
diff --git a/src/autointent/configs/_transformers.py b/src/autointent/configs/_transformers.py
index 55a4a88e7..b06a323b5 100644
--- a/src/autointent/configs/_transformers.py
+++ b/src/autointent/configs/_transformers.py
@@ -29,9 +29,9 @@ class EmbedderFineTuningConfig(BaseModel):
     margin: float = Field(default=0.5)
     learning_rate: float = Field(default=2e-5)
     # Fed to TrainingArguments.warmup_steps in v5, which interprets float<1 as
-    # a fraction of total steps and float>=1 as a raw step count. Restrict to
-    # (0, 1) so warmup_ratio=1.0 doesn't silently become a single step.
-    warmup_ratio: float = Field(default=0.1, gt=0, lt=1)
+    # a fraction of total steps and float>=1 as a raw step count. Cap below 1
+    # so warmup_ratio=1.0 doesn't silently become a single step.
+    warmup_ratio: float = Field(default=0.1, ge=0, lt=1)
     early_stopping_patience: int = Field(default=1)
     early_stopping_threshold: float = Field(default=0.0)
     val_fraction: float = Field(default=0.2)

From 5dfc75d6aca7d224e4953bd13afd86d59b3b7bc0 Mon Sep 17 00:00:00 2001
From: voorhs <ilya_alekseev_2016@list.ru>
Date: Sun, 28 Jun 2026 00:04:04 +0300
Subject: [PATCH 4/7] fix(deps): satisfy huggingface_hub v1 strict validation
 and mypy on v5
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- _bert.py: coerce label2id/id2label keys to str. huggingface_hub 1.x
  StrictDataclassFieldValidationError rejects int-keyed label2id; the
  v5 AutoModelForSequenceClassification.from_pretrained pipeline now
  routes through that validator, so the previous {int: int} mapping
  raised on every BertScorer.fit (and cascaded into a fallback
  hf_hub_download call that the test guard caught as 'unpinned').
- ranker.py: cast cross_encoder[0] to Any for auto_model.classifier
  access (nn.Sequential.__getitem__ is typed Tensor | Module on v5);
  add arg-type ignores on CrossEncoder.predict(list[tuple[str,str]])
  calls — the v5 stub demands the much wider Sequence type but the
  list-of-pairs form is the documented call shape.
- Drop type: ignore comments mypy now reports as unused
  (AutoTokenizer.from_pretrained gained a typed stub in transformers
  v5; max_length matches TokenizerConfig.max_length cleanly).
- conftest.py: SentenceTransformer's constructor is typed Any on v5,
  so add no-any-return ignore at the fixture boundary.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
---
 src/autointent/_dump_tools/unit_dumpers.py |  2 +-
 src/autointent/_wrappers/ranker.py         | 13 ++++++-------
 src/autointent/modules/scoring/_bert.py    |  6 +++---
 tests/conftest.py                          |  2 +-
 tests/modules/test_dumper.py               |  3 +--
 5 files changed, 12 insertions(+), 14 deletions(-)

diff --git a/src/autointent/_dump_tools/unit_dumpers.py b/src/autointent/_dump_tools/unit_dumpers.py
index 2b1c201bb..ca3cda488 100644
--- a/src/autointent/_dump_tools/unit_dumpers.py
+++ b/src/autointent/_dump_tools/unit_dumpers.py
@@ -292,7 +292,7 @@ def load(path: Path, **kwargs: Any) -> PreTrainedTokenizer | PreTrainedTokenizer
         require("transformers")
         import transformers
 
-        return transformers.AutoTokenizer.from_pretrained(path)  # type: ignore[no-any-return,no-untyped-call]
+        return transformers.AutoTokenizer.from_pretrained(path)
 
     @classmethod
     def check_isinstance(cls, obj: Any) -> bool:  # noqa: ANN401
diff --git a/src/autointent/_wrappers/ranker.py b/src/autointent/_wrappers/ranker.py
index cf07e9406..fd0656122 100644
--- a/src/autointent/_wrappers/ranker.py
+++ b/src/autointent/_wrappers/ranker.py
@@ -12,7 +12,7 @@
 import logging
 from pathlib import Path
 from random import shuffle
-from typing import TYPE_CHECKING, Any, Literal, TypedDict
+from typing import TYPE_CHECKING, Any, Literal, TypedDict, cast
 
 import joblib
 import numpy as np
@@ -127,7 +127,7 @@ def __init__(
             revision=self.config.revision,
             trust_remote_code=self.config.trust_remote_code,
             device=self.config.device,
-            max_length=self.config.tokenizer_config.max_length,  # type: ignore[arg-type]
+            max_length=self.config.tokenizer_config.max_length,
         )
         self._train_head = False
         self._clf = classifier_head
@@ -138,9 +138,8 @@ def __init__(
             self._activations_list: list[npt.NDArray[Any]] = []
             # CrossEncoder is a nn.Sequential of modules; [0] is the Transformer
             # wrapping the HF model exposed as .auto_model.
-            self._hook_handler = self.cross_encoder[0].auto_model.classifier.register_forward_hook(
-                self._classifier_hook
-            )
+            transformer = cast("Any", self.cross_encoder[0])
+            self._hook_handler = transformer.auto_model.classifier.register_forward_hook(self._classifier_hook)
 
     def _classifier_hook(self, _module, input_tensor, _output_tensor) -> None:  # type: ignore[no-untyped-def] # noqa: ANN001
         """Hook to capture classifier activations.
@@ -165,13 +164,13 @@ def _get_features_or_predictions(self, pairs: list[tuple[str, str]]) -> npt.NDAr
         if not self._train_head:
             return np.array(
                 self.cross_encoder.predict(
-                    pairs,
+                    pairs,  # type: ignore[arg-type]
                     batch_size=self.config.batch_size,
                     activation_fn=nn.Sigmoid() if self.output_range == "sigmoid" else nn.Tanh(),
                 )
             )
 
-        self.cross_encoder.predict(pairs, batch_size=self.config.batch_size)
+        self.cross_encoder.predict(pairs, batch_size=self.config.batch_size)  # type: ignore[arg-type]
         res = np.concatenate(self._activations_list, axis=0)
         self._activations_list.clear()
         return res  # type: ignore[no-any-return]
diff --git a/src/autointent/modules/scoring/_bert.py b/src/autointent/modules/scoring/_bert.py
index 816fd863e..055437334 100644
--- a/src/autointent/modules/scoring/_bert.py
+++ b/src/autointent/modules/scoring/_bert.py
@@ -130,8 +130,8 @@ def get_implicit_initialization_params(self) -> dict[str, Any]:
     def _initialize_model(self) -> Any:  # noqa: ANN401
         from transformers import AutoModelForSequenceClassification
 
-        label2id = {i: i for i in range(self._n_classes)}
-        id2label = {i: i for i in range(self._n_classes)}
+        label2id = {str(i): i for i in range(self._n_classes)}
+        id2label = {i: str(i) for i in range(self._n_classes)}
 
         return AutoModelForSequenceClassification.from_pretrained(
             self.classification_model_config.model_name,
@@ -152,7 +152,7 @@ def fit(
 
         self._validate_task(labels)
 
-        self._tokenizer = AutoTokenizer.from_pretrained(  # type: ignore[no-untyped-call]
+        self._tokenizer = AutoTokenizer.from_pretrained(
             self.classification_model_config.model_name, revision=self.classification_model_config.revision
         )
         self._model = self._initialize_model()
diff --git a/tests/conftest.py b/tests/conftest.py
index 397fd2142..ab6f4eba2 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -159,7 +159,7 @@ def tiny_sentence_transformer() -> SentenceTransformer:
 
     from autointent.configs._pinned_revisions import DEFAULT_REVISIONS
 
-    return SentenceTransformer(TINY_SENTENCE_TRANSFORMER, revision=DEFAULT_REVISIONS[TINY_SENTENCE_TRANSFORMER])
+    return SentenceTransformer(TINY_SENTENCE_TRANSFORMER, revision=DEFAULT_REVISIONS[TINY_SENTENCE_TRANSFORMER])  # type: ignore[no-any-return]
 
 
 def apply_test_models(pipeline: Pipeline) -> None:
diff --git a/tests/modules/test_dumper.py b/tests/modules/test_dumper.py
index 52a60e88b..188c9ac35 100644
--- a/tests/modules/test_dumper.py
+++ b/tests/modules/test_dumper.py
@@ -41,8 +41,7 @@ class TestTransformers:
     def init_attributes(self) -> None:
         from transformers import AutoModelForSequenceClassification, AutoTokenizer
 
-        # reason: transformers AutoTokenizer.from_pretrained is untyped in stubs
-        self.tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased")  # type: ignore[no-untyped-call]
+        self.tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased")
         self._tokenizer_predictions = np.array(self.tokenizer(["hello", "world"]).input_ids)
         self.transformer = AutoModelForSequenceClassification.from_pretrained("bert-base-uncased")
 

From 35e639450e04d73d75ff0d5eaedb751d4e1e2458 Mon Sep 17 00:00:00 2001
From: voorhs <ilya_alekseev_2016@list.ru>
Date: Sun, 28 Jun 2026 00:09:34 +0300
Subject: [PATCH 5/7] docs(_bert): explain str-key label2id requirement to
 prevent regression

A future refactor sees `{str(i): i}` as a no-op coercion and "simplifies"
back to `{i: i}`; mypy passes, then BertScorer.fit raises
StrictDataclassFieldValidationError at runtime. Comment makes the WHY
explicit at the call site, matching the WHY-only comment policy from
14f9576c.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
---
 src/autointent/modules/scoring/_bert.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/src/autointent/modules/scoring/_bert.py b/src/autointent/modules/scoring/_bert.py
index 055437334..188cf29da 100644
--- a/src/autointent/modules/scoring/_bert.py
+++ b/src/autointent/modules/scoring/_bert.py
@@ -130,6 +130,9 @@ def get_implicit_initialization_params(self) -> dict[str, Any]:
     def _initialize_model(self) -> Any:  # noqa: ANN401
         from transformers import AutoModelForSequenceClassification
 
+        # huggingface_hub v1 StrictDataclass requires label2id keys to be str
+        # (and id2label values to be str); int-keyed dicts raise
+        # StrictDataclassFieldValidationError on from_pretrained in v5.
         label2id = {str(i): i for i in range(self._n_classes)}
         id2label = {i: str(i) for i in range(self._n_classes)}
 

From 8b25d2787e3f715d03e30138c687fab746b499ca Mon Sep 17 00:00:00 2001
From: voorhs <ilya_alekseev_2016@list.ru>
Date: Sun, 28 Jun 2026 00:25:00 +0300
Subject: [PATCH 6/7] fix(_bert): pin revision on transformers v5 PEFT adapter
 probe
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

When PEFT is installed, transformers v5 calls find_adapter_config_file
on every AutoModelForSequenceClassification.from_pretrained. The
auto_factory only propagates `_commit_hash` (used for the cache
lookup) but NOT the outer `revision` to the fall-through
hf_hub_download. On a cold cache — i.e. our CI warm-cache job, which
populates model files but no negative marker for adapter_config.json —
that probe fires `hf_hub_download(repo_id, adapter_config.json,
revision=None)` and our test guard rightly flagged it as unpinned.

Pass `adapter_kwargs={"revision": revision}` so the adapter probe
inherits the pin. The first run still writes a `.no_exist` marker, but
all subsequent runs (and CI's pinned-only contract) stay clean.

Reproduces with: rm -rf ~/.cache/huggingface/hub/models--prajjwal1--bert-tiny/.no_exist
then pytest tests/pipeline/test_inference.py::test_inference_from_config[multiclass].

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
---
 src/autointent/modules/scoring/_bert.py | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/src/autointent/modules/scoring/_bert.py b/src/autointent/modules/scoring/_bert.py
index 188cf29da..419113907 100644
--- a/src/autointent/modules/scoring/_bert.py
+++ b/src/autointent/modules/scoring/_bert.py
@@ -136,14 +136,21 @@ def _initialize_model(self) -> Any:  # noqa: ANN401
         label2id = {str(i): i for i in range(self._n_classes)}
         id2label = {i: str(i) for i in range(self._n_classes)}
 
+        # transformers v5 + PEFT triggers find_adapter_config_file on every
+        # from_pretrained; it propagates _commit_hash for the cache lookup but
+        # NOT the outer `revision` to the fall-through hf_hub_download
+        # (auto_factory.py:308 only forwards adapter_kwargs). Set revision
+        # explicitly via adapter_kwargs so the adapter probe stays pinned.
+        revision = self.classification_model_config.revision
         return AutoModelForSequenceClassification.from_pretrained(
             self.classification_model_config.model_name,
             trust_remote_code=self.classification_model_config.trust_remote_code,
-            revision=self.classification_model_config.revision,
+            revision=revision,
             num_labels=self._n_classes,
             label2id=label2id,
             id2label=id2label,
             problem_type="multi_label_classification" if self._multilabel else "single_label_classification",
+            adapter_kwargs={"revision": revision} if revision is not None else None,
         )
 
     def fit(

From f1369d18fc70ff369634c08b98bf997ee578c184 Mon Sep 17 00:00:00 2001
From: voorhs <ilya_alekseev_2016@list.ru>
Date: Sun, 28 Jun 2026 00:41:56 +0300
Subject: [PATCH 7/7] fix(lora,ptuning): short-circuit PEFT vocab-check that
 hits the hub
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

PEFT's get_peft_model_state_dict (save_and_load.py:380-384) runs an
embedding-resize sanity check on every Trainer.save_checkpoint by
calling model.config.__class__.from_pretrained(base_model_name_or_path)
with no revision. transformers fills in revision='main' as the default,
so the call hits hf_hub_download('prajjwal1/bert-tiny',
'config.json', revision='main') — unpinned, which our CI guard
correctly flags. On a cold cache (CI), this trips on every
LoRA/PTuning trial that runs through Trainer.

Clear base_model_name_or_path on the peft_config after get_peft_model
so the vocab check short-circuits at `if model_id is not None`. Our
dumper (PeftModelDumper / HFModelDumper) saves the base model
separately and the load path passes it explicitly, so the adapter
config doesn't need to remember it.

Reproduces with:
  rm -rf ~/.cache/huggingface/hub/models--prajjwal1--bert-tiny/.no_exist
  pytest tests/pipeline/test_inference.py::test_inference_from_config[multiclass]

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
---
 src/autointent/modules/scoring/_lora/lora.py       | 10 +++++++++-
 src/autointent/modules/scoring/_ptuning/ptuning.py | 10 +++++++++-
 2 files changed, 18 insertions(+), 2 deletions(-)

diff --git a/src/autointent/modules/scoring/_lora/lora.py b/src/autointent/modules/scoring/_lora/lora.py
index f310bd3eb..5c9829a94 100644
--- a/src/autointent/modules/scoring/_lora/lora.py
+++ b/src/autointent/modules/scoring/_lora/lora.py
@@ -120,7 +120,15 @@ def _initialize_model(self) -> Any:  # noqa: ANN401
         model = super()._initialize_model()
         from peft import get_peft_model
 
-        return get_peft_model(model, self._lora_config)
+        peft_model = get_peft_model(model, self._lora_config)
+        # PEFT's save_pretrained vocab-check (save_and_load.py:380-384) calls
+        # AutoConfig.from_pretrained(base_model_name_or_path) with no revision
+        # during every Trainer checkpoint. On a cold cache this falls through
+        # to an unpinned hf_hub_download. Clearing base_model_name_or_path
+        # short-circuits the check; our dumper saves the base model
+        # separately (HFModelDumper), so the adapter doesn't need to remember it.
+        peft_model.peft_config["default"].base_model_name_or_path = ""
+        return peft_model
 
     def dump(self, path: str) -> None:
         from peft import LoraConfig
diff --git a/src/autointent/modules/scoring/_ptuning/ptuning.py b/src/autointent/modules/scoring/_ptuning/ptuning.py
index 162ee502d..c443a5066 100644
--- a/src/autointent/modules/scoring/_ptuning/ptuning.py
+++ b/src/autointent/modules/scoring/_ptuning/ptuning.py
@@ -154,7 +154,15 @@ def _initialize_model(self) -> Any:  # noqa: ANN401
         model = super()._initialize_model()
         from peft import get_peft_model
 
-        return get_peft_model(model, self._ptuning_config)
+        peft_model = get_peft_model(model, self._ptuning_config)
+        # PEFT's save_pretrained vocab-check (save_and_load.py:380-384) calls
+        # AutoConfig.from_pretrained(base_model_name_or_path) with no revision
+        # during every Trainer checkpoint. On a cold cache this falls through
+        # to an unpinned hf_hub_download. Clearing base_model_name_or_path
+        # short-circuits the check; our dumper saves the base model
+        # separately (HFModelDumper), so the adapter doesn't need to remember it.
+        peft_model.peft_config["default"].base_model_name_or_path = ""
+        return peft_model
 
     def dump(self, path: str) -> None:
         from peft import PromptEncoderConfig