Remove dep on torchtune for weight conversion (#17515)

lucylq · web-flow · commit eef30fdf3c46 · 2026-02-26T09:19:23.000-08:00
### Summary
Remove dep on torchtune for weight conversion.

After this, I think torchtune is only used for model definitions in
- phi-3-mini-lora
- llama3_2_vision
(these can't be removed)

And a few other ckpt conversions via FullModelHFCheckpointer (this can
be removed, in next pr)


### Test plan
CI
diff --git a/examples/models/checkpoint.py b/examples/models/checkpoint.py
@@ -9,6 +9,7 @@
 
 import json
 import os
+import re
 from pathlib import Path
 from typing import Any, Dict, Optional
 
@@ -112,3 +113,30 @@ def load_checkpoint_from_pytorch_model(input_dir: str) -> Dict:
         return state_dict
 
     raise FileNotFoundError(f"Could not find pytorch_model checkpoint in {input_dir}")
+
+
+def get_mapped_key(key: str, mapping_dict: Dict[str, str]) -> str:
+    """Map a state dict key using a mapping dictionary with "{}" layer number placeholders."""
+    try:
+        # Checks if there is a layer # in the key
+        if any(k.isdigit() for k in key.split(".")):
+            # Replace layer number with "{}" to create key for lookup
+            abstract_key = re.sub(r"(\.\d+)", ".{}", key)
+            match = re.search(r"\.(\d+)", key)
+            if match is None:
+                raise Exception(
+                    f'Error converting the state dict. Could not find layer number in key: "{key}". '
+                    "Please make sure you're loading a checkpoint with the right format. "
+                )
+            layer_num = match.group(1)
+            new_key = mapping_dict[abstract_key]
+            new_key = new_key.format(layer_num)
+        else:
+            new_key = mapping_dict[key]
+    except KeyError as e:
+        raise Exception(
+            f'Error converting the state dict. Found unexpected key: "{key}". '
+            "Please make sure you're loading a checkpoint with the right format. "
+        ) from e
+
+    return new_key
diff --git a/examples/models/codegen/convert_weight.py b/examples/models/codegen/convert_weight.py
@@ -4,7 +4,7 @@
 
 import torch
 
-from torchtune.models.convert_weights import get_mapped_key
+from executorch.examples.models.checkpoint import get_mapped_key
 
 # Standard _FROM_META weight mapping of Meta weights to TorchTune + additional bias weight mappings.
 _HF__CODEGEN_2_FROM_META = {
diff --git a/examples/models/gemma/convert_weights.py b/examples/models/gemma/convert_weights.py
@@ -5,9 +5,9 @@
 from typing import Dict
 
 import torch
-from safetensors.torch import load_file
 
-from torchtune.models.convert_weights import get_mapped_key
+from executorch.examples.models.checkpoint import get_mapped_key
+from safetensors.torch import load_file
 
 
 # Weight mappings from Gemma's checkpoint to ExecuTorch's transformer parameters.
diff --git a/examples/models/gemma2/convert_weights.py b/examples/models/gemma2/convert_weights.py
@@ -10,9 +10,9 @@
 from typing import Dict
 
 import torch
-from safetensors.torch import load_file
 
-from torchtune.models.convert_weights import get_mapped_key
+from executorch.examples.models.checkpoint import get_mapped_key
+from safetensors.torch import load_file
 
 
 # Weight mappings from Gemma 2's checkpoint to ExecuTorch's transformer parameters.
diff --git a/examples/models/gemma3/convert_weights.py b/examples/models/gemma3/convert_weights.py
@@ -5,9 +5,9 @@
 from typing import Dict
 
 import torch
-from safetensors.torch import load_file
 
-from torchtune.models.convert_weights import get_mapped_key
+from executorch.examples.models.checkpoint import get_mapped_key
+from safetensors.torch import load_file
 
 
 # Weight mappings from Gemma 3's checkpoint to ExecuTorch's transformer parameters.
diff --git a/examples/models/glm/convert_weights.py b/examples/models/glm/convert_weights.py
@@ -3,8 +3,8 @@
 from typing import Dict
 
 import torch
+from executorch.examples.models.checkpoint import get_mapped_key
 from safetensors.torch import load_file
-from torchtune.models.convert_weights import get_mapped_key
 
 # Standard _FROM_META weight mapping of Meta weights to TorchTune + additional bias weight mappings.
 _GLM_FROM_META = {
diff --git a/examples/models/granite/convert_weights.py b/examples/models/granite/convert_weights.py
@@ -5,9 +5,9 @@
 from typing import Dict
 
 import torch
-from safetensors.torch import load_file
 
-from torchtune.models.convert_weights import get_mapped_key
+from executorch.examples.models.checkpoint import get_mapped_key
+from safetensors.torch import load_file
 
 
 # Weight mappings from Granite 3's checkpoint to ExecuTorch's transformer parameters.
diff --git a/examples/models/internvl3/convert_weights.py b/examples/models/internvl3/convert_weights.py
@@ -2,9 +2,9 @@
 from typing import Dict
 
 import torch
+from executorch.examples.models.checkpoint import get_mapped_key
 
 from executorch.examples.models.smollm3.convert_weights import load_checkpoint
-from torchtune.models.convert_weights import get_mapped_key
 
 # Standard _FROM_META weight mapping of Meta weights to TorchTune + additional bias weight mappings.
 _INTERNVL_TO_META = {
diff --git a/examples/models/lfm2/convert_weights.py b/examples/models/lfm2/convert_weights.py
@@ -3,9 +3,9 @@
 from typing import Dict
 
 import torch
-from safetensors.torch import load_file
 
-from torchtune.models.convert_weights import get_mapped_key
+from executorch.examples.models.checkpoint import get_mapped_key
+from safetensors.torch import load_file
 
 _LFM_2_TO_META = {
     "model.embed_tokens.weight": "tok_embeddings.weight",
diff --git a/examples/models/llama/convert_weights.py b/examples/models/llama/convert_weights.py
@@ -1,9 +1,9 @@
 from typing import Dict
 
 import torch
+from executorch.examples.models.checkpoint import get_mapped_key
 
 from safetensors.torch import load_file
-from torchtune.models.convert_weights import get_mapped_key
 
 _UNSLOTH_TO_META = {
     "base_model.model.model.layers.{}.mlp.down_proj.lora_A.weight": "layers.{}.feed_forward.w2.lora_a.weight",
diff --git a/examples/models/phi_4_mini/convert_weights.py b/examples/models/phi_4_mini/convert_weights.py
@@ -2,9 +2,11 @@
 from typing import Dict
 
 import torch
-from executorch.examples.models.checkpoint import load_checkpoint_from_pytorch_model
 
-from torchtune.models.convert_weights import get_mapped_key
+from executorch.examples.models.checkpoint import (
+    get_mapped_key,
+    load_checkpoint_from_pytorch_model,
+)
 
 from torchtune.training import FullModelHFCheckpointer
 
diff --git a/examples/models/qwen3/convert_weights.py b/examples/models/qwen3/convert_weights.py
@@ -5,10 +5,12 @@
 from typing import Dict
 
 import torch
-from executorch.examples.models.checkpoint import load_checkpoint_from_pytorch_model
-from safetensors.torch import load_file
 
-from torchtune.models.convert_weights import get_mapped_key
+from executorch.examples.models.checkpoint import (
+    get_mapped_key,
+    load_checkpoint_from_pytorch_model,
+)
+from safetensors.torch import load_file
 
 # Standard _FROM_META weight mapping of Meta weights to TorchTune + additional bias weight mappings.
 _QWEN_3_FROM_META = {
diff --git a/examples/models/smollm2/convert_weights.py b/examples/models/smollm2/convert_weights.py
@@ -3,7 +3,7 @@
 
 import torch
 
-from torchtune.models.convert_weights import get_mapped_key
+from executorch.examples.models.checkpoint import get_mapped_key
 
 from torchtune.training import FullModelHFCheckpointer
 
diff --git a/examples/models/smollm3/convert_weights.py b/examples/models/smollm3/convert_weights.py
@@ -5,9 +5,9 @@
 
 import torch
 
-from safetensors.torch import load_file
+from executorch.examples.models.checkpoint import get_mapped_key
 
-from torchtune.models.convert_weights import get_mapped_key
+from safetensors.torch import load_file
 
 
 _SMOLLM_TO_META = {
diff --git a/examples/models/smolvlm/convert_weights.py b/examples/models/smolvlm/convert_weights.py
@@ -2,9 +2,9 @@
 from typing import Dict
 
 import torch
+from executorch.examples.models.checkpoint import get_mapped_key
 
 from executorch.examples.models.smollm3.convert_weights import load_checkpoint
-from torchtune.models.convert_weights import get_mapped_key
 
 # Standard _FROM_META weight mapping of Meta weights to TorchTune + additional bias weight mappings.
 _SMOLVLM_TO_META = {