feat: vision encoder should always be exported with float32

NorbertKlockiewicz · NorbertKlockiewicz · commit 341e36eead1e · 2026-03-05T10:59:29.000+01:00
diff --git a/examples/models/lfm2_5_vl/README.md b/examples/models/lfm2_5_vl/README.md
@@ -22,18 +22,14 @@ python examples/models/lfm2_5_vl/export_lfm2_5_vl.py \
     --dtype fp32
 ```
 
-With quantization (8da4w decoder + int8 embedding):
+With quantization (8da4w decoder + int8 embedding + float32 vision encoder):
 
 ```bash
 python examples/models/lfm2_5_vl/export_lfm2_5_vl.py \
     --model_dir LiquidAI/LFM2-VL-1.6B \
     --quantize
 ```
 
-## Runner Compatibility
-
-The exported PTE is compatible with `llava_main` (the ExecuTorch multimodal C++ runner). Method names match `extension/llm/runner/constants.h`.
-
 ### Required runner configuration
 
 - Resize image to exactly 512×512
diff --git a/examples/models/lfm2_5_vl/export_lfm2_5_vl.py b/examples/models/lfm2_5_vl/export_lfm2_5_vl.py
@@ -88,9 +88,7 @@ def export(self) -> "Lfm2p5VlEdgeManager":
         return self
 
 
-def export_image_encoder(
-    lfm2, quantize: bool = False
-) -> torch.export.ExportedProgram:
+def export_image_encoder(lfm2, quantize: bool = False) -> torch.export.ExportedProgram:
     """Export vision encoder as 'vision_encoder' method.
 
     Input:  [1, 3, 512, 512] float32 NCHW pixels in [0, 255]
@@ -120,9 +118,7 @@ def forward(self, images: torch.Tensor) -> torch.Tensor:
 
     if quantize:
         logging.info("Exporting vision encoder (int8 dynamic quantized)...")
-        quantizer = XNNPACKQuantizer().set_global(
-            get_symmetric_quantization_config()
-        )
+        quantizer = XNNPACKQuantizer().set_global(get_symmetric_quantization_config())
         manager = (
             Lfm2p5VlEdgeManager(
                 model=encoder,
@@ -285,7 +281,7 @@ def export_all(
         lfm2 = lfm2.to(dtype.to_torch_dtype())
 
     logging.info("[1/3] Exporting vision encoder...")
-    vision_ep = export_image_encoder(lfm2, quantize)
+    vision_ep = export_image_encoder(lfm2, quantize=False)
 
     # Text decoder MUST come before token embedding (see export_token_embedding docstring)
     logging.info("[2/3] Exporting text decoder...")