[Cortex-M]: Add int8 I/O quantization to Cortex-M export path

Github Executorch · Github Executorch · commit 0bba44a7000f · 2026-03-03T17:58:05.000-08:00
Apply QuantizeInputs and QuantizeOutputs passes in the Cortex-M
compilation path to strip the float-in/float-out wrapper from
quantized models. This produces a fully int8 model that accepts
and returns int8 tensors directly.

The passes are applied after to_edge_transform_and_lower but before
CortexMPassManager, since the latter renames quantized_decomposed
ops to cortex_m variants which the I/O passes cannot recognize.
diff --git a/examples/arm/aot_arm_compiler.py b/examples/arm/aot_arm_compiler.py
@@ -47,6 +47,8 @@
 from executorch.devtools.backend_debug import get_delegation_info
 from executorch.devtools.bundled_program.config import MethodTestCase, MethodTestSuite
 
+from executorch.exir.passes.quantize_io_pass import QuantizeInputs, QuantizeOutputs
+
 from executorch.exir import (
     EdgeCompileConfig,
     ExecutorchBackendConfig,
@@ -860,6 +862,17 @@ def _to_channels_last(x):
         ),
     )
 
+    # Strip the float I/O wrapper from the quantized model to produce
+    # fully int8 inputs and outputs. This must run before CortexMPassManager
+    # which renames quantized_decomposed ops to cortex_m variants.
+    if args.quantize:
+        print("Applying passes to create a fully int8 quantized model...")
+
+        edge = edge.transform([
+            QuantizeInputs(edge, [0]),
+            QuantizeOutputs(edge, [0]),
+        ])
+
     pass_manager = CortexMPassManager(edge.exported_program())
     edge._edge_programs["forward"] = pass_manager.transform()