feat(lfm2_5_vl): add LFM2.5-VL-450M support

NorbertKlockiewicz · claude · NorbertKlockiewicz · commit 7fd4dab3a84e · 2026-04-09T10:14:40.000+02:00
Bundle an architecture config for the 450M checkpoint (dim=1024,
hidden_dim=4608, same layer_types as 1.6B) and auto-select the right
params JSON from --model_dir in export_lfm2_5_vl.py. The existing
convert_weights and model.py are already dim-agnostic, so no code
changes are needed on the loading path.

Verified by exporting lfm2_5_vl_450m_quantized_xnnpack.pte (619MB,
fp32 vision encoder + 8da4w decoder + int8 embedding). The 450M shares
the same tokenizer, EOS/BOS tokens and chat template as the 1.6B, so
the C++ runner needs no changes.

Authored with Claude Code.

Co-Authored-By: Claude Opus 4.6 (1M context) &lt;noreply@anthropic.com&gt;
diff --git a/examples/models/lfm2_5_vl/README.md b/examples/models/lfm2_5_vl/README.md
@@ -1,35 +1,46 @@
 # LFM2.5-VL ExecuTorch Export
 
-Export [LiquidAI/LFM2-VL-1.6B](https://huggingface.co/LiquidAI/LFM2-VL-1.6B) to ExecuTorch as a single multi-method PTE compatible with the LLaVA C++ runner.
+Export the LFM2.5-VL family to ExecuTorch as a single multi-method PTE compatible with the LLaVA C++ runner. Both checkpoints are supported and share the same export path:
+
+- [LiquidAI/LFM2-VL-1.6B](https://huggingface.co/LiquidAI/LFM2-VL-1.6B) — text dim 2048
+- [LiquidAI/LFM2.5-VL-450M](https://huggingface.co/LiquidAI/LFM2.5-VL-450M) — text dim 1024
 
 LFM2.5-VL is a **hybrid SSM+attention vision-language model** — 16 decoder layers alternating between short convolution blocks and full attention blocks, paired with a SigLIP ViT vision encoder.
 
 ## Architecture
 
-Three named methods in one PTE:
+Three named methods in one PTE (`D` = text hidden dim: 2048 for 1.6B, 1024 for 450M):
 
 | Method | Input | Output |
 |--------|-------|--------|
-| `vision_encoder` | `[1, 3, 512, 512]` float32 NCHW pixels [0,255] | `[1, 256, 2048]` float32 |
-| `token_embedding` | `[1, seq_len]` int64 token IDs | `[1, seq_len, 2048]` float32 |
-| `text_decoder` | `([1, seq_len, 2048]` float32, `[seq_len]` int64) | `[1, 65536]` float32 |
+| `vision_encoder` | `[1, 3, 512, 512]` float32 NCHW pixels [0,255] | `[1, 256, D]` float32 |
+| `token_embedding` | `[1, seq_len]` int64 token IDs | `[1, seq_len, D]` float32 |
+| `text_decoder` | `([1, seq_len, D]` float32, `[seq_len]` int64) | `[1, 65536]` float32 |
 
 ## Export
 
 ```bash
+# 1.6B (default)
 python examples/models/lfm2_5_vl/export_lfm2_5_vl.py \
     --model_dir LiquidAI/LFM2-VL-1.6B \
     --dtype fp32
+
+# 450M — bundled config is auto-selected from --model_dir
+python examples/models/lfm2_5_vl/export_lfm2_5_vl.py \
+    --model_dir LiquidAI/LFM2.5-VL-450M \
+    --dtype fp32
 ```
 
 With quantization (8da4w decoder + int8 embedding + float32 vision encoder):
 
 ```bash
 python examples/models/lfm2_5_vl/export_lfm2_5_vl.py \
-    --model_dir LiquidAI/LFM2-VL-1.6B \
+    --model_dir LiquidAI/LFM2.5-VL-450M \
     --quantize
 ```
 
+The bundled architecture configs live in [config/](config/). Pass `--params /path/to/custom.json` to override.
+
 ### Required runner configuration
 
 - Resize image to exactly 512×512
diff --git a/examples/models/lfm2_5_vl/config/lfm2_5_vl_450m_config.json b/examples/models/lfm2_5_vl/config/lfm2_5_vl_450m_config.json
@@ -0,0 +1,33 @@
+{
+  "dim": 1024,
+  "ffn_dim_multiplier": 1,
+  "hidden_dim": 4608,
+  "n_heads": 16,
+  "n_kv_heads": 8,
+  "n_layers": 16,
+  "norm_eps": 1e-5,
+  "rope_theta": 1000000.0,
+  "use_scaled_rope": false,
+  "vocab_size": 65536,
+  "use_hf_rope": true,
+  "use_qk_norm": true,
+  "qk_norm_before_rope": true,
+  "layer_types": [
+    "conv",
+    "conv",
+    "full_attention",
+    "conv",
+    "conv",
+    "full_attention",
+    "conv",
+    "conv",
+    "full_attention",
+    "conv",
+    "full_attention",
+    "conv",
+    "full_attention",
+    "conv",
+    "full_attention",
+    "conv"
+  ]
+}
diff --git a/examples/models/lfm2_5_vl/export_lfm2_5_vl.py b/examples/models/lfm2_5_vl/export_lfm2_5_vl.py
@@ -5,21 +5,25 @@
 # LICENSE file in the root directory of this source tree.
 
 """
-Export LFM2.5-VL-1.6B as a single multi-method PTE for ExecuTorch's
-generic MultimodalRunner (C++ llava_main).
+Export LFM2.5-VL as a single multi-method PTE for ExecuTorch's generic
+MultimodalRunner (C++ llava_main). Supports both LFM2.5-VL-1.6B (text dim
+2048) and LFM2.5-VL-450M (text dim 1024); the architecture config is picked
+from the bundled config/ directory based on --model_dir, or you can pass
+--params to point at a custom JSON.
 
-Methods:
-  vision_encoder  : [1, 3, 512, 512] f32 NCHW pixels [0,255] -> [1, 256, 2048] f32
-  token_embedding : [1, seq_len] i64                          -> [1, seq_len, 2048] f32
-  text_decoder    : ([1, seq_len, 2048] f32, [seq_len] i64)   -> [1, 65536] f32
+Methods (D = text hidden dim: 2048 for 1.6B, 1024 for 450M):
+  vision_encoder  : [1, 3, 512, 512] f32 NCHW pixels [0,255] -> [1, 256, D] f32
+  token_embedding : [1, seq_len] i64                          -> [1, seq_len, D] f32
+  text_decoder    : ([1, seq_len, D] f32, [seq_len] i64)      -> [1, 65536] f32
 
 Usage:
     python examples/models/lfm2_5_vl/export_lfm2_5_vl.py \
-        --model_dir /path/to/LFM2-VL-1.6B \
+        --model_dir LiquidAI/LFM2.5-VL-450M \
         [--dtype fp32|fp16] [--quantize] [--output lfm2_5_vl_xnnpack.pte]
 """
 
 import logging
+import os
 from argparse import ArgumentParser, BooleanOptionalAction
 from typing import Optional
 
@@ -66,6 +70,23 @@
 FORMAT = "[%(levelname)s %(asctime)s %(filename)s:%(lineno)s] %(message)s"
 logging.basicConfig(level=logging.INFO, format=FORMAT)
 
+_CONFIG_DIR = os.path.join(os.path.dirname(__file__), "config")
+
+
+def _resolve_params_path(model_dir: str, params: Optional[str]) -> Optional[str]:
+    """Pick a bundled config based on model_dir if --params was not provided.
+
+    Returns None to fall back to model.py's default (1.6B).
+    """
+    if params is not None:
+        return params
+    name = model_dir.lower()
+    if "450m" in name:
+        return os.path.join(_CONFIG_DIR, "lfm2_5_vl_450m_config.json")
+    if "1.6b" in name or "1_6b" in name:
+        return os.path.join(_CONFIG_DIR, "lfm2_5_vl_1_6b_config.json")
+    return None
+
 
 class Lfm2p5VlEdgeManager(LLMEdgeManager):
     """LLMEdgeManager subclass for LFM2.5-VL.
@@ -354,11 +375,14 @@ def export_all(
 
 
 def main():
-    parser = ArgumentParser(description="Export LFM2.5-VL-1.6B to ExecuTorch")
+    parser = ArgumentParser(description="Export LFM2.5-VL to ExecuTorch")
     parser.add_argument(
         "--model_dir",
         default="LiquidAI/LFM2-VL-1.6B",
-        help="HuggingFace model ID or local path",
+        help=(
+            "HuggingFace model ID or local path. Supported: "
+            "LiquidAI/LFM2-VL-1.6B, LiquidAI/LFM2.5-VL-450M."
+        ),
     )
     parser.add_argument(
         "--dtype",
@@ -388,8 +412,8 @@ def main():
         "--params",
         default=None,
         help=(
-            "Path to model params JSON (architecture config). "
-            "Defaults to the bundled config/lfm2_5_vl_1_6b_config.json."
+            "Path to model params JSON (architecture config). When omitted, "
+            "the bundled 1.6B or 450M config is selected from --model_dir."
         ),
     )
     parser.add_argument(
@@ -400,8 +424,12 @@ def main():
     args = parser.parse_args()
 
     dtype = DType.fp16 if args.dtype == "fp16" else DType.fp32
-    suffix = ("_fp16" if dtype == DType.fp16 else "") + (
-        "_quantized" if args.quantize else ""
+    params_path = _resolve_params_path(args.model_dir, args.params)
+    size_tag = "_450m" if (params_path or "").endswith("450m_config.json") else ""
+    suffix = (
+        size_tag
+        + ("_fp16" if dtype == DType.fp16 else "")
+        + ("_quantized" if args.quantize else "")
     )
     output = args.output or f"lfm2_5_vl{suffix}_xnnpack.pte"
 
@@ -412,7 +440,7 @@ def main():
         args.quantize,
         args.max_seq_len,
         args.max_context_len,
-        args.params,
+        params_path,
     )