Skip to content

Commit 0c2ff55

Browse files
psiddhGithub ExecutorchCopilot
authored
Arm backend: Remove old transform_for_cortex_m_backend and --enable_qdq_fusion_pass (#17740)
Summary: Remove the transform_for_cortex_m_backend() function and deprecate --enable_qdq_fusion_pass CLI flag from aot_arm_compiler.py. Instead, ReplaceQuantNodesPass is now applied directly inside to_edge_TOSA_delegate() and to_edge_no_delegate(), making each compilation path self-contained rather than relying on a post-hoc fixup applied to all targets. This is a prerequisite for PR #17075, which introduces Cortex-M as a first-class compilation target with its own dedicated pipeline. cc @digantdesai @SS-JIA @freddan80 @per @zingo @oscarandersson8218 @mansnils @Sebastian-Larsson @robell --------- Co-authored-by: Github Executorch <github_executorch@arm.com> Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
1 parent bcfb551 commit 0c2ff55

1 file changed

Lines changed: 26 additions & 34 deletions

File tree

examples/arm/aot_arm_compiler.py

Lines changed: 26 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -38,14 +38,6 @@
3838
from executorch.backends.arm.vgf import VgfCompileSpec
3939

4040
# To use Cortex-M backend
41-
from executorch.backends.cortex_m.passes.convert_to_cortex_m_pass import (
42-
ConvertToCortexMPass,
43-
)
44-
45-
from executorch.backends.cortex_m.passes.quantized_op_fusion_pass import (
46-
QuantizedOpFusionPass,
47-
)
48-
4941
from executorch.backends.cortex_m.passes.replace_quant_nodes_pass import (
5042
ReplaceQuantNodesPass,
5143
)
@@ -207,6 +199,14 @@ def _load_serialized_model(
207199
return model, example_inputs
208200

209201

202+
def _apply_replace_quant_nodes(edge, args):
203+
"""Apply the replace_quant_nodes pass to the edge graph module."""
204+
205+
if args.target != "vgf" and not args.direct_drive:
206+
edge = edge.transform([ReplaceQuantNodesPass()])
207+
return edge
208+
209+
210210
def get_model_and_inputs_from_name(
211211
model_name: str, model_input: str | None
212212
) -> Tuple[torch.nn.Module, Any]:
@@ -606,7 +606,7 @@ def get_args():
606606
parser.add_argument(
607607
"--enable_qdq_fusion_pass",
608608
action="store_true",
609-
help="Enable the Quantized qdq fusion Op passes",
609+
help="[DEPRECATED] This flag is no longer used and will be removed in a future release.",
610610
)
611611
parser.add_argument(
612612
"--enable_debug_mode",
@@ -787,6 +787,11 @@ def to_edge_TOSA_delegate(
787787
),
788788
)
789789

790+
# Replace quantized_decomposed::{quantize,dequantize}_per_tensor nodes
791+
# with cortex_m:: equivalents for int8 QDQ ops remaining outside the
792+
# delegated subgraph.
793+
edge = _apply_replace_quant_nodes(edge, args)
794+
790795
return model_quant, edge
791796

792797

@@ -822,27 +827,12 @@ def to_edge_no_delegate(
822827
),
823828
)
824829

825-
return model_quant, edge
826-
827-
828-
def transform_for_cortex_m_backend(edge_program_manager, args):
829-
# Let's make sure we are using optimized Cortex M backend
830-
# NB: If we can't find and replace ops those are expected to be replaced,
831-
# bad things will happen at runtime, like "missing operator" errors!
830+
# Replace quantized_decomposed::{quantize,dequantize}_per_tensor nodes
831+
# with cortex_m:: equivalents for int8 QDQ ops remaining outside the
832+
# delegated subgraph.
833+
edge = _apply_replace_quant_nodes(edge, args)
832834

833-
# Instantiate the mandatory ReplaceQuantNodesPass
834-
passes = [ReplaceQuantNodesPass]
835-
if args.enable_qdq_fusion_pass:
836-
passes += [ConvertToCortexMPass, QuantizedOpFusionPass]
837-
current_edge = edge_program_manager
838-
for pass_cls in passes:
839-
transform_pass = (
840-
pass_cls(current_edge.exported_program())
841-
if pass_cls.__name__ == "QuantizedLinearFusionPass"
842-
else pass_cls()
843-
)
844-
current_edge = current_edge.transform([transform_pass])
845-
return current_edge
835+
return model_quant, edge
846836

847837

848838
if __name__ == "__main__": # noqa: C901
@@ -863,6 +853,13 @@ def transform_for_cortex_m_backend(edge_program_manager, args):
863853
model = exported_program.module()
864854
model_fp32 = model
865855

856+
if args.enable_qdq_fusion_pass:
857+
logging.warning(
858+
"--enable_qdq_fusion_pass is deprecated and has no effect. "
859+
"Quantized node replacement is now handled within the "
860+
"respective compilation paths."
861+
)
862+
866863
model_name = os.path.basename(os.path.splitext(args.model_name)[0])
867864
if args.intermediates:
868865
os.makedirs(args.intermediates, exist_ok=True)
@@ -885,11 +882,6 @@ def transform_for_cortex_m_backend(edge_program_manager, args):
885882
exported_program, args, model, example_inputs
886883
)
887884

888-
# Cortex-m ops are never included in vgf or direct-drive
889-
if args.target != "vgf" and not args.direct_drive:
890-
# Transform so we can use ops from the Cortex M backend
891-
edge = transform_for_cortex_m_backend(edge, args)
892-
893885
dump_delegation_info(edge, args.intermediates)
894886

895887
edge_program_manager_copy = copy.deepcopy(edge)

0 commit comments

Comments
 (0)