feat(lfm2_5_vl): add BUCK file, activation stats logging, and smoke tests

NorbertKlockiewicz · claude · NorbertKlockiewicz · commit 68a408a34638 · 2026-03-03T18:57:32.000+01:00
- BUCK: python_library + export_lib + binary targets following llava/BUCK
  and lfm2/BUCK patterns
- export_all: log non_const_buffer_sizes per execution plan (matches llava)
- export_all: add _return_program=True for tests (avoids writing to disk)
- test/test_lfm2_5_vl.py: vision encoder shape, prefill shape, export
  method names, and end-to-end prefill+decode loop tests

Co-Authored-By: Claude Sonnet 4.6 &lt;noreply@anthropic.com&gt;
diff --git a/examples/models/lfm2_5_vl/BUCK b/examples/models/lfm2_5_vl/BUCK
@@ -0,0 +1,55 @@
+load("@fbcode_macros//build_defs:build_file_migration.bzl", "fbcode_target", "non_fbcode_target")
+# Any targets that should be shared between fbcode and xplat must be defined in
+# targets.bzl. This file can contain fbcode-only targets.
+
+load("@fbsource//xplat/executorch/build:runtime_wrapper.bzl", "runtime")
+
+oncall("executorch")
+
+fbcode_target(_kind = runtime.python_library,
+    name = "lfm2_5_vl",
+    srcs = [
+        "__init__.py",
+        "convert_weights.py",
+        "model.py",
+    ],
+    resources = {
+        "config/lfm2_5_vl_1_6b_config.json": "config/lfm2_5_vl_1_6b_config.json",
+    },
+    base_module = "executorch.examples.models.lfm2_5_vl",
+    visibility = ["PUBLIC"],
+    deps = [
+        "//caffe2:torch",
+        "//executorch/examples/models/llama:transformer_modules",
+        "//executorch/examples/models/llama:export_library",
+        "fbsource//third-party/pypi/safetensors:safetensors",
+        "fbsource//third-party/pypi/transformers:transformers",
+    ],
+)
+
+fbcode_target(_kind = runtime.python_library,
+    name = "export_lib",
+    srcs = [
+        "export_lfm2_5_vl.py",
+    ],
+    _is_external_target = True,
+    base_module = "executorch.examples.models.lfm2_5_vl",
+    visibility = [
+        "//executorch/...",
+    ],
+    deps = [
+        ":lfm2_5_vl",
+    ],
+)
+
+fbcode_target(_kind = runtime.python_binary,
+    name = "export",
+    main_function = "executorch.examples.models.lfm2_5_vl.export_lfm2_5_vl.main",
+    preload_deps = [
+        "//executorch/extension/llm/custom_ops:custom_ops_aot_lib",
+        "//executorch/kernels/quantized:aot_lib",
+    ],
+    deps = [
+        ":export_lib",
+    ],
+)
diff --git a/examples/models/lfm2_5_vl/export_lfm2_5_vl.py b/examples/models/lfm2_5_vl/export_lfm2_5_vl.py
@@ -232,13 +232,14 @@ def export_token_embedding(
 
 def export_all(
     model_dir: str,
-    output: str,
+    output: Optional[str],
     dtype: DType = DType.fp32,
     quantize: bool = False,
     max_seq_len: int = MAX_SEQ_LEN,
     max_context_len: int = MAX_SEQ_LEN,
     params_path: Optional[str] = None,
-) -> None:
+    _return_program: bool = False,
+):
     logging.info(f"Loading {model_dir}...")
     lfm2_model = Lfm2p5VlModel(
         model_dir=model_dir,
@@ -309,8 +310,16 @@ def export_all(
         )
     )
 
+    for execution_plan in et_program._emitter_output.program.execution_plan:
+        logging.info(
+            f"Required memory for activation in bytes: {execution_plan.non_const_buffer_sizes}"
+        )
+
+    if _return_program:
+        return et_program
+
     logging.info(f"Saving {output}...")
-    with open(output, "wb") as f:
+    with open(output, "wb") as f:  # type: ignore[arg-type]
         et_program.write_to_file(f)
     logging.info(f"Saved {output}. Methods: {et_program.methods}")
 
diff --git a/examples/models/lfm2_5_vl/test/test_lfm2_5_vl.py b/examples/models/lfm2_5_vl/test/test_lfm2_5_vl.py
@@ -0,0 +1,116 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+import logging
+import unittest
+
+import torch
+from executorch.examples.models.lfm2_5_vl.export_lfm2_5_vl import export_all
+from executorch.examples.models.lfm2_5_vl.model import IMAGE_SIZE, MAX_SEQ_LEN, Lfm2p5VlModel
+
+# import order matters: portable_lib must come first so its static op registry
+# is in place before custom_ops registers against it.
+from executorch.extension.pybindings.portable_lib import (  # noqa # usort: skip
+    _load_for_executorch_from_buffer,
+)
+from executorch.extension.llm.custom_ops import custom_ops  # noqa # usort: skip
+from executorch.kernels import quantized  # noqa # usort: skip
+
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+
+MODEL_DIR = "LiquidAI/LFM2-VL-1.6B"
+
+
+class TestLfm2p5Vl(unittest.TestCase):
+    @classmethod
+    def setUpClass(cls):
+        cls.lfm2_model = Lfm2p5VlModel(model_dir=MODEL_DIR)
+        cls.lfm2 = cls.lfm2_model.get_eager_model().eval()
+
+    def test_vision_encoder_shape(self):
+        """Vision encoder must produce [1, 256, 2048] embeddings."""
+        pixels = torch.randint(0, 256, (1, 3, IMAGE_SIZE, IMAGE_SIZE), dtype=torch.float32)
+        with torch.no_grad():
+            embeds = self.lfm2.image_embedding(pixels)
+        self.assertEqual(embeds.shape, (1, 256, 2048))
+
+    def test_prefill_output_shape(self):
+        """Prefill must return (seq_len: int, logits [1, vocab_size])."""
+        prompt_before, pixels, prompt_after = self.lfm2_model.get_inputs_for_prefill()
+        with torch.no_grad():
+            seq_len, logits = self.lfm2.prefill(prompt_before, pixels, prompt_after)
+        self.assertIsInstance(seq_len, int)
+        self.assertEqual(logits.shape[-1], 65536)
+
+    def test_export_methods(self):
+        """Exported PTE must contain the three named methods and metadata."""
+        et_program = export_all(
+            model_dir=MODEL_DIR,
+            output=None,  # in-memory only
+            _return_program=True,
+        )
+        self.assertIn("vision_encoder", et_program.methods)
+        self.assertIn("token_embedding", et_program.methods)
+        self.assertIn("text_decoder", et_program.methods)
+
+    def test_export_and_run(self):
+        """Export to PTE and run a short prefill + decode loop end-to-end."""
+        et_program = export_all(
+            model_dir=MODEL_DIR,
+            output=None,
+            _return_program=True,
+        )
+        module = _load_for_executorch_from_buffer(et_program.buffer)
+
+        prompt_before, pixels, prompt_after = self.lfm2_model.get_inputs_for_prefill()
+        start_pos = 0
+
+        # Embed and prefill tokens before image
+        before_embeds = module.run_method("token_embedding", (prompt_before,))[0]
+        module.run_method(
+            "text_decoder",
+            (before_embeds, torch.arange(start_pos, start_pos + before_embeds.shape[1])),
+        )
+        start_pos += before_embeds.shape[1]
+
+        # Vision encoder
+        image_embeds = module.run_method("vision_encoder", (pixels,))[0]
+        module.run_method(
+            "text_decoder",
+            (image_embeds, torch.arange(start_pos, start_pos + image_embeds.shape[1])),
+        )
+        start_pos += image_embeds.shape[1]
+
+        # Embed and prefill tokens after image
+        after_embeds = module.run_method("token_embedding", (prompt_after,))[0]
+        logits = module.run_method(
+            "text_decoder",
+            (after_embeds, torch.arange(start_pos, start_pos + after_embeds.shape[1])),
+        )[0]
+        start_pos += after_embeds.shape[1]
+
+        # Decode a few tokens — just check we get valid token IDs
+        new_tokens = [torch.argmax(logits).item()]
+        for i in range(3):
+            token_embed = module.run_method(
+                "token_embedding",
+                (torch.tensor([[new_tokens[i]]], dtype=torch.int64),),
+            )[0]
+            logits = module.run_method(
+                "text_decoder",
+                (token_embed, torch.tensor([start_pos + i], dtype=torch.int64)),
+            )[0]
+            new_tokens.append(torch.argmax(logits).item())
+
+        self.assertEqual(len(new_tokens), 4)
+        for tok in new_tokens:
+            self.assertGreaterEqual(tok, 0)
+            self.assertLess(tok, 65536)
+
+
+if __name__ == "__main__":
+    unittest.main()