chore: fix has_tied_word_embeddings for pipeline parallelism

le1nux · le1nux · commit 6d4d3ca89d3c · 2026-06-28T00:06:02.000+02:00
diff --git a/src/modalities/models/gpt2/gpt2_model.py b/src/modalities/models/gpt2/gpt2_model.py
@@ -940,9 +940,13 @@ def __init__(
 
     @property
     def has_tied_word_embeddings(self) -> bool:
-        token_embedding_weight = getattr(self.transformer.wte, "weight", None)
-        lm_head_weight = getattr(self.transformer.lm_head, "weight", None)
-        return token_embedding_weight is not None and token_embedding_weight is lm_head_weight
+        # In pipeline parallelism a stage's transformer may not contain the wte/lm_head submodules
+        # (e.g. a middle stage has neither). Such a stage has no tying to report, so return False when
+        # either submodule is absent. Whether tied embeddings are allowed at all (they are not, for PP)
+        # is enforced separately by the pipeline/TP config validators on the whole, unsplit model.
+        if "wte" not in self.transformer or "lm_head" not in self.transformer:
+            return False
+        return self.transformer.wte.weight is self.transformer.lm_head.weight
 
     @overload
     def forward(self, inputs: dict[str, torch.Tensor]) -> dict[str, torch.Tensor]:
diff --git a/tests/test_weight_tying.py b/tests/test_weight_tying.py
@@ -149,13 +149,13 @@ def test_has_tied_word_embeddings_requires_model_capability():
         has_tied_word_embeddings(nn.Linear(1, 1))
 
 
-@pytest.mark.parametrize("module_name", ["transformer", "wte", "lm_head"])
+@pytest.mark.parametrize("module_name", ["wte", "lm_head"])
 def test_has_tied_word_embeddings_handles_pipeline_stage(module_name: str):
+    # In pipeline parallelism a stage's transformer ModuleDict only contains the submodules assigned
+    # to that stage (the transformer container itself is always present), so a stage may lack wte
+    # and/or lm_head. Such a stage has no tying to report and must not raise.
     model = create_gpt2_model(use_weight_tying=True)
-    if module_name == "transformer":
-        del model.transformer
-    else:
-        del model.transformer[module_name]
+    del model.transformer[module_name]
 
     assert has_tied_word_embeddings(model) is False
 
diff --git a/tutorials/instruction_tuning/experiments/.gitkeep b/tutorials/instruction_tuning/experiments/.gitkeep