merge

stephantul · stephantul · commit 39b3881a30be · 2025-09-07T20:37:30.000+02:00
diff --git a/model2vec/train/base.py b/model2vec/train/base.py
@@ -25,6 +25,7 @@ def __init__(
         pad_id: int = 0,
         token_mapping: list[int] | None = None,
         weights: torch.Tensor | None = None,
+        freeze: bool = False,
     ) -> None:
         """
         Initialize a trainable StaticModel from a StaticModel.
@@ -35,6 +36,7 @@ def __init__(
         :param pad_id: The padding id. This is set to 0 in almost all model2vec models
         :param token_mapping: The token mapping. If None, the token mapping is set to the range of the number of vectors.
         :param weights: The weights of the model. If None, the weights are initialized to zeros.
+        :param freeze: Whether to freeze the embeddings. This should be set to False in most cases.
         """
         super().__init__()
         self.pad_id = pad_id
@@ -54,7 +56,8 @@ def __init__(
         else:
             self.token_mapping = torch.arange(len(vectors), dtype=torch.int64)
         self.token_mapping = nn.Parameter(self.token_mapping, requires_grad=False)
-        self.embeddings = nn.Embedding.from_pretrained(vectors.clone(), freeze=False, padding_idx=pad_id)
+        self.freeze = freeze
+        self.embeddings = nn.Embedding.from_pretrained(vectors.clone(), freeze=self.freeze, padding_idx=pad_id)
         self.head = self.construct_head()
         self.w = self.construct_weights() if weights is None else nn.Parameter(weights, requires_grad=True)
         self.tokenizer = tokenizer
@@ -63,7 +66,7 @@ def construct_weights(self) -> nn.Parameter:
         """Construct the weights for the model."""
         weights = torch.zeros(len(self.token_mapping))
         weights[self.pad_id] = -10_000
-        return nn.Parameter(weights)
+        return nn.Parameter(weights, requires_grad=not self.freeze)
 
     def construct_head(self) -> nn.Sequential:
         """Method should be overridden for various other classes."""
diff --git a/model2vec/train/classifier.py b/model2vec/train/classifier.py
@@ -40,6 +40,7 @@ def __init__(
         pad_id: int = 0,
         token_mapping: list[int] | None = None,
         weights: torch.Tensor | None = None,
+        freeze: bool = False,
     ) -> None:
         """Initialize a standard classifier model."""
         self.n_layers = n_layers
@@ -55,6 +56,7 @@ def __init__(
             tokenizer=tokenizer,
             token_mapping=token_mapping,
             weights=weights,
+            freeze=freeze,
         )
 
     @property
@@ -133,7 +135,7 @@ def predict_proba(self, X: list[str], show_progress_bar: bool = False, batch_siz
                 pred.append(torch.softmax(logits, dim=1).cpu().numpy())
         return np.concatenate(pred, axis=0)
 
-    def fit(  #  noqa: C901  # Refactor later
+    def fit(  # noqa: C901  # Complexity is bad.
         self,
         X: list[str],
         y: LabelType,
@@ -309,7 +311,9 @@ def _initialize(self, y: LabelType) -> None:
         self.classes_ = classes
         self.out_dim = len(self.classes_)  # Update output dimension
         self.head = self.construct_head()
-        self.embeddings = nn.Embedding.from_pretrained(self.vectors.clone(), freeze=False, padding_idx=self.pad_id)
+        self.embeddings = nn.Embedding.from_pretrained(
+            self.vectors.clone(), freeze=self.freeze, padding_idx=self.pad_id
+        )
         self.w = self.construct_weights()
         self.train()