Remove legacy transform_for_cortex_m_backend and --enable_qdq_fusion_pass flag

Github Executorch · Github Executorch · commit 1805ef008006 · 2026-02-26T12:33:09.000-08:00
Summary: Remove the transform_for_cortex_m_backend() function and the --enable_qdq_fusion_pass CLI flag from aot_arm_compiler.py. The function applied Cortex-M passes as a post-hoc step to all non-VGF targets, which made the compilation flow hard to follow and coupled the delegation path to Cortex-M-specific logic. Instead, ReplaceQuantNodesPass is now applied directly inside to_edge_TOSA_delegate() to handle any boundary quantized_decomposed::* nodes that remain outside the delegated subgraph. This makes the delegation path self-contained and explicit about its runtime requirements. This change is in preparation for an upcoming PR (#17075) that introduces Cortex-M as a first-class compilation target with its own dedicated pipeline, including CortexMQuantizer and CortexMPassManager.
diff --git a/examples/arm/aot_arm_compiler.py b/examples/arm/aot_arm_compiler.py
@@ -38,14 +38,6 @@
 from executorch.backends.arm.vgf import VgfCompileSpec
 
 # To use Cortex-M backend
-from executorch.backends.cortex_m.passes.convert_to_cortex_m_pass import (
-    ConvertToCortexMPass,
-)
-
-from executorch.backends.cortex_m.passes.quantized_op_fusion_pass import (
-    QuantizedOpFusionPass,
-)
-
 from executorch.backends.cortex_m.passes.replace_quant_nodes_pass import (
     ReplaceQuantNodesPass,
 )
@@ -603,11 +595,6 @@ def get_args():
         action="store_false",
         help="Disable strict checking while exporting models.",
     )
-    parser.add_argument(
-        "--enable_qdq_fusion_pass",
-        action="store_true",
-        help="Enable the Quantized qdq fusion Op passes",
-    )
     parser.add_argument(
         "--enable_debug_mode",
         required=False,
@@ -787,6 +774,12 @@ def to_edge_TOSA_delegate(
         ),
     )
 
+    # Replace quantized_decomposed::* nodes with cortex_m::* equivalents for
+    # any QDQ ops that remain outside the delegated subgraph.
+    # Skip for targets/runtimes that do not ship Cortex-M kernels.
+    if args.target != "vgf" and not args.direct_drive:
+        edge = edge.transform([ReplaceQuantNodesPass()])
+
     return model_quant, edge
 
 
@@ -825,26 +818,6 @@ def to_edge_no_delegate(
     return model_quant, edge
 
 
-def transform_for_cortex_m_backend(edge_program_manager, args):
-    # Let's make sure we are using optimized Cortex M backend
-    # NB: If we can't find and replace ops those are expected to be replaced,
-    # bad things will happen at runtime, like "missing operator" errors!
-
-    # Instantiate the mandatory ReplaceQuantNodesPass
-    passes = [ReplaceQuantNodesPass]
-    if args.enable_qdq_fusion_pass:
-        passes += [ConvertToCortexMPass, QuantizedOpFusionPass]
-    current_edge = edge_program_manager
-    for pass_cls in passes:
-        transform_pass = (
-            pass_cls(current_edge.exported_program())
-            if pass_cls.__name__ == "QuantizedLinearFusionPass"
-            else pass_cls()
-        )
-        current_edge = current_edge.transform([transform_pass])
-    return current_edge
-
-
 if __name__ == "__main__":  # noqa: C901
     args = get_args()
 
@@ -885,11 +858,6 @@ def transform_for_cortex_m_backend(edge_program_manager, args):
             exported_program, args, model, example_inputs
         )
 
-    # Cortex-m ops are never included in vgf or direct-drive
-    if args.target != "vgf" and not args.direct_drive:
-        # Transform so we can use ops from the Cortex M backend
-        edge = transform_for_cortex_m_backend(edge, args)
-
     dump_delegation_info(edge, args.intermediates)
 
     edge_program_manager_copy = copy.deepcopy(edge)