Update examples/arm/aot_arm_compiler.py

psiddh · Copilot · web-flow · commit ef0208629a25 · 2026-03-03T18:30:08.000-08:00
Co-authored-by: Copilot &lt;175728472+Copilot@users.noreply.github.com&gt;
diff --git a/examples/arm/aot_arm_compiler.py b/examples/arm/aot_arm_compiler.py
@@ -866,13 +866,26 @@ def _to_channels_last(x):
     # fully int8 inputs and outputs. This must run before CortexMPassManager
     # which renames quantized_decomposed ops to cortex_m variants.
     if args.quantize:
-        print("Applying passes to create a fully int8 quantized model...")
-
-        edge = edge.transform([
-            QuantizeInputs(edge, [0]),
-            QuantizeOutputs(edge, [0]),
-        ])
-
+        # When BundleIO is enabled, the bundled example_inputs/expected_outputs
+        # are still based on float I/O. Applying QuantizeInputs/QuantizeOutputs
+        # here would make the ExecuTorch program use int8 I/O while the bundled
+        # I/O remains float, causing dtype mismatches at runtime. Until the
+        # BundleIO path is updated to support int8 I/O (or to quantize the
+        # bundled data using recorded qparams), skip these passes when
+        # bundleio is enabled.
+        if getattr(args, "bundleio", False):
+            logging.warning(
+                "Skipping QuantizeInputs/QuantizeOutputs because --bundleio is "
+                "enabled and bundled I/O currently assumes float tensors."
+            )
+        else:
+            print("Applying passes to create a fully int8 quantized model...")
+            edge = edge.transform(
+                [
+                    QuantizeInputs(edge, [0]),
+                    QuantizeOutputs(edge, [0]),
+                ]
+            )
     pass_manager = CortexMPassManager(edge.exported_program())
     edge._edge_programs["forward"] = pass_manager.transform()