Skip to content

Commit 3544f0b

Browse files
committed
Add LFM2.5-VL export with CUDA/AOTI backend
Export LFM2.5-VL (450M and 1.6B) as a multi-method PTE with three methods (vision_encoder, token_embedding, text_decoder), all delegated to the CUDA/AOTI backend. New files under examples/models/lfm2_5_vl/: model, weight converter, export script, and config JSONs. Modifications to existing files are kept minimal: - examples/models/lfm2/short_conv.py: replace nn.Conv1d(groups=dim) call with manual pointwise multiply+sum. Triton has no template for depthwise conv1d with dynamic sequence length. Mutable buffer state for conv_state is unchanged — AOTI handles it via mark_static_address at export time, same mechanism as the KV cache in MHA. - exir/emit/_emitter.py: copy CUDA tensor storage to CPU before ctypes pointer read during constant serialization. Prevents segfault when exporting a model whose parameters live on CUDA. Tested on NVIDIA B300 (CUDA 13.0, torch 2.11): 333-400 decode tok/s, 435-454 prefill tok/s, coherent generation on text-only and vision- language prompts via both the Python pybindings and the llama_main C++ runner.
1 parent 36e8ed9 commit 3544f0b

8 files changed

Lines changed: 556 additions & 2 deletions

File tree

examples/models/lfm2/short_conv.py

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -74,7 +74,14 @@ def forward(self, x: torch.Tensor) -> torch.Tensor:
7474
with torch.no_grad():
7575
self.conv_state.copy_(new_conv_state)
7676

77-
conv_out = self.conv(Bx)[..., : x.size(-1)] # (batch_size, dim, seq_len)
77+
# Manual depthwise conv: Triton has no template for nn.Conv1d with
78+
# groups=dim and dynamic seq_len. kernel_size is always 3.
79+
w = self.conv.weight[:, 0, :] # (dim, 3)
80+
conv_out = (
81+
Bx[..., :-2] * w[:, 0:1]
82+
+ Bx[..., 1:-1] * w[:, 1:2]
83+
+ Bx[..., 2:] * w[:, 2:3]
84+
) # (batch_size, dim, seq_len)
7885
y = C * conv_out # (batch_size, dim, seq_len)
7986

8087
y = y.transpose(-1, -2) # (batch_size, seq_len, dim)
Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
# Copyright (c) Meta Platforms, Inc. and affiliates.
2+
# All rights reserved.
3+
#
4+
# This source code is licensed under the BSD-style license found in the
5+
# LICENSE file in the root directory of this source tree.
6+
7+
from executorch.examples.models.lfm2_5_vl.convert_weights import convert_weights
8+
from executorch.examples.models.lfm2_5_vl.model import Lfm2p5VlModel
9+
10+
__all__ = [
11+
"convert_weights",
12+
"Lfm2p5VlModel",
13+
]
Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
{
2+
"dim": 2048,
3+
"ffn_dim_multiplier": 1,
4+
"hidden_dim": 8192,
5+
"n_heads": 32,
6+
"n_kv_heads": 8,
7+
"n_layers": 16,
8+
"norm_eps": 1e-5,
9+
"rope_theta": 1000000.0,
10+
"use_scaled_rope": false,
11+
"vocab_size": 65536,
12+
"use_hf_rope": true,
13+
"use_qk_norm": true,
14+
"qk_norm_before_rope": true,
15+
"layer_types": [
16+
"conv",
17+
"conv",
18+
"full_attention",
19+
"conv",
20+
"conv",
21+
"full_attention",
22+
"conv",
23+
"conv",
24+
"full_attention",
25+
"conv",
26+
"full_attention",
27+
"conv",
28+
"full_attention",
29+
"conv",
30+
"full_attention",
31+
"conv"
32+
]
33+
}
Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
{
2+
"dim": 1024,
3+
"ffn_dim_multiplier": 1,
4+
"hidden_dim": 4608,
5+
"n_heads": 16,
6+
"n_kv_heads": 8,
7+
"n_layers": 16,
8+
"norm_eps": 1e-5,
9+
"rope_theta": 1000000.0,
10+
"use_scaled_rope": false,
11+
"vocab_size": 65536,
12+
"use_hf_rope": true,
13+
"use_qk_norm": true,
14+
"qk_norm_before_rope": true,
15+
"layer_types": [
16+
"conv",
17+
"conv",
18+
"full_attention",
19+
"conv",
20+
"conv",
21+
"full_attention",
22+
"conv",
23+
"conv",
24+
"full_attention",
25+
"conv",
26+
"full_attention",
27+
"conv",
28+
"full_attention",
29+
"conv",
30+
"full_attention",
31+
"conv"
32+
]
33+
}
Lines changed: 81 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,81 @@
1+
# Copyright (c) Meta Platforms, Inc. and affiliates.
2+
# All rights reserved.
3+
#
4+
# This source code is licensed under the BSD-style license found in the
5+
# LICENSE file in the root directory of this source tree.
6+
7+
"""Convert LFM2.5-VL text decoder weights from HuggingFace to ET format."""
8+
9+
from __future__ import annotations
10+
11+
import argparse
12+
from pathlib import Path
13+
14+
import torch
15+
from executorch.examples.models.checkpoint import get_mapped_key
16+
from safetensors.torch import load_file
17+
18+
_LFM2_5_VL_TO_META: dict[str, str] = {
19+
"model.language_model.embed_tokens.weight": "tok_embeddings.weight",
20+
"model.language_model.embedding_norm.weight": "norm.weight",
21+
"model.language_model.layers.{}.self_attn.q_proj.weight": "layers.{}.attention.wq.weight",
22+
"model.language_model.layers.{}.self_attn.k_proj.weight": "layers.{}.attention.wk.weight",
23+
"model.language_model.layers.{}.self_attn.v_proj.weight": "layers.{}.attention.wv.weight",
24+
"model.language_model.layers.{}.self_attn.out_proj.weight": "layers.{}.attention.wo.weight",
25+
"model.language_model.layers.{}.self_attn.q_layernorm.weight": "layers.{}.attention.q_norm_fn.weight",
26+
"model.language_model.layers.{}.self_attn.k_layernorm.weight": "layers.{}.attention.k_norm_fn.weight",
27+
"model.language_model.layers.{}.operator_norm.weight": "layers.{}.attention_norm.weight",
28+
"model.language_model.layers.{}.ffn_norm.weight": "layers.{}.ffn_norm.weight",
29+
"model.language_model.layers.{}.feed_forward.w1.weight": "layers.{}.feed_forward.w1.weight",
30+
"model.language_model.layers.{}.feed_forward.w2.weight": "layers.{}.feed_forward.w2.weight",
31+
"model.language_model.layers.{}.feed_forward.w3.weight": "layers.{}.feed_forward.w3.weight",
32+
"model.language_model.layers.{}.conv.conv.weight": "layers.{}.conv.conv.weight",
33+
"model.language_model.layers.{}.conv.out_proj.weight": "layers.{}.conv.out_proj.weight",
34+
"model.language_model.lm_head.weight": "output.weight",
35+
}
36+
37+
_IN_PROJ_SPLITS = ("B_proj", "C_proj", "x_proj")
38+
39+
40+
def lfm2_5_vl_to_meta(state_dict: dict[str, torch.Tensor]) -> dict[str, torch.Tensor]:
41+
"""Extract and remap language model weights from a full VL state dict."""
42+
converted: dict[str, torch.Tensor] = {}
43+
44+
for key, value in state_dict.items():
45+
if not key.startswith("model.language_model."):
46+
continue
47+
48+
try:
49+
new_key = get_mapped_key(key, _LFM2_5_VL_TO_META)
50+
except Exception:
51+
new_key = key.removeprefix("model.language_model.")
52+
53+
if new_key.endswith(".conv.in_proj.weight"):
54+
for name, chunk in zip(_IN_PROJ_SPLITS, torch.chunk(value, 3, dim=0)):
55+
converted[new_key.replace("in_proj", name)] = chunk
56+
else:
57+
converted[new_key] = value
58+
59+
if "output.weight" not in converted:
60+
converted["output.weight"] = converted["tok_embeddings.weight"]
61+
62+
return converted
63+
64+
65+
def convert_weights(input_dir: str, output_file: str) -> None:
66+
sd = load_file(str(Path(input_dir) / "model.safetensors"))
67+
sd = lfm2_5_vl_to_meta(sd)
68+
torch.save(sd, output_file)
69+
print(f"Saved {len(sd)} tensors to {output_file}")
70+
71+
72+
def main() -> None:
73+
parser = argparse.ArgumentParser(description="Convert LFM2.5-VL weights to ET format.")
74+
parser.add_argument("input_dir", help="Directory containing model.safetensors.")
75+
parser.add_argument("output", help="Output .pt checkpoint path.")
76+
args = parser.parse_args()
77+
convert_weights(args.input_dir, args.output)
78+
79+
80+
if __name__ == "__main__":
81+
main()

0 commit comments

Comments
 (0)