Skip to content

Commit 0cd86ee

Browse files
author
Github Executorch
committed
Add Cortex-M as a first-class target in aot_arm_compiler
Previously, Cortex-M op conversion was applied as an afterthought to all non-vgf targets via transform_for_cortex_m_backend(). This made the flow hard to follow, used a bare EdgeCompileConfig that decomposed ops like linear into addmm (requiring unnecessary workarounds), and didn't use the CortexMQuantizer or CortexMPassManager. Add a dedicated to_edge_cortex_m() path selected via --target=cortex-m that owns the full pipeline: CortexMQuantizer for INT8 quantization, correct EdgeCompileConfig with preserve_ops to prevent premature decomposition, and CortexMPassManager.pass_list for op conversion. Remove the old scattered transform_for_cortex_m_backend() function. Verified all ops fully lowered to cortex_m::quantized_* operators for both MobileNetV2 (70 nodes) and MobileNetV3 (122 nodes). E2E inference tested on Alif E8 board. Test Plan: python3 -m examples.arm.aot_arm_compiler -m mv2 --target=cortex-m55+int8 --quantize --intermediates=./mv2_intermediates --output=./mv2_cortex_m.pte python3 -m examples.arm.aot_arm_compiler -m mv3 --target=cortex-m55+int8 --quantize --intermediates=./mv3_intermediates --output=./mv3_cortex_m.pte Also ran E2E inference on Alif E8 board
1 parent 0c2ff55 commit 0cd86ee

2 files changed

Lines changed: 92 additions & 4 deletions

File tree

backends/cortex_m/passes/convert_to_cortex_m_pass.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -71,7 +71,7 @@ def _get_batch_size_from_conv(self, conv_node: torch.fx.Node):
7171

7272
def _get_linear_replacement(self, node):
7373
"""
74-
Let
74+
Let
7575
- yi be the output activations (y1, ... yn)
7676
- xj be the input activations (x1, ... xm)
7777
- wij be the weights (w11, ... wnm)

examples/arm/aot_arm_compiler.py

Lines changed: 91 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -36,11 +36,12 @@
3636
from executorch.backends.arm.util._factory import create_partitioner, create_quantizer
3737

3838
from executorch.backends.arm.vgf import VgfCompileSpec
39+
from executorch.backends.cortex_m.passes.cortex_m_pass_manager import CortexMPassManager
3940

40-
# To use Cortex-M backend
4141
from executorch.backends.cortex_m.passes.replace_quant_nodes_pass import (
4242
ReplaceQuantNodesPass,
4343
)
44+
from executorch.backends.cortex_m.quantizer.quantizer import CortexMQuantizer
4445

4546
from executorch.devtools import generate_etrecord
4647
from executorch.devtools.backend_debug import get_delegation_info
@@ -396,6 +397,7 @@ def forward(self, x):
396397
"TOSA-1.0+INT",
397398
"TOSA-1.0+FP",
398399
"TOSA-1.0+INT+int16",
400+
"cortex-m55+int8",
399401
]
400402

401403

@@ -528,7 +530,7 @@ def get_args():
528530
required=False,
529531
default="ethos-u55-128",
530532
choices=TARGETS,
531-
help=f"For ArmBackend delegated models, pick the target, and therefore the instruction set generated. valid targets are {TARGETS}",
533+
help=f"Target backend. For delegated models: Ethos-U/VGF/TOSA variants. For non-delegated: cortex-m55+int8 (CMSIS-NN portable kernels). Valid targets: {TARGETS}",
532534
)
533535
parser.add_argument(
534536
"-e",
@@ -795,6 +797,75 @@ def to_edge_TOSA_delegate(
795797
return model_quant, edge
796798

797799

800+
def to_edge_cortex_m(
801+
exported_program: ExportedProgram,
802+
args,
803+
model: GraphModule,
804+
example_inputs: Tuple[torch.Tensor],
805+
):
806+
"""Cortex-M/CMSIS-NN compilation path with no delegation."""
807+
logging.info("Using Cortex-M/CMSIS-NN compilation path (no delegation)")
808+
809+
def _to_channels_last(x):
810+
if isinstance(x, torch.Tensor):
811+
if x.dim() == 4 and not x.is_contiguous(memory_format=torch.channels_last):
812+
logging.warning(
813+
"Converting input tensor with shape %s to channels_last",
814+
list(x.shape),
815+
)
816+
return x.to(memory_format=torch.channels_last)
817+
return x
818+
elif isinstance(x, tuple):
819+
return tuple(_to_channels_last(t) for t in x)
820+
return x
821+
822+
if not args.quantize:
823+
logging.warning(
824+
"Quantization is DISABLED. Cortex-M typically requires quantization."
825+
)
826+
else:
827+
model = model.to(memory_format=torch.channels_last)
828+
example_inputs = tuple(_to_channels_last(x) for x in example_inputs)
829+
830+
quantizer = CortexMQuantizer()
831+
prepared = prepare_pt2e(model, quantizer)
832+
833+
dataset = get_calibration_data(
834+
args.model_name, example_inputs, args.evaluate, args.evaluate_config
835+
)
836+
837+
if isinstance(dataset, DataLoader):
838+
for sample, _ in dataset:
839+
prepared(_to_channels_last(sample))
840+
else:
841+
prepared(*tuple(_to_channels_last(x) for x in dataset))
842+
843+
model_quant = convert_pt2e(prepared)
844+
845+
exported_program = torch.export.export(
846+
model_quant, example_inputs, strict=args.strict_export
847+
)
848+
849+
edge = to_edge_transform_and_lower(
850+
exported_program,
851+
compile_config=EdgeCompileConfig(
852+
preserve_ops=[
853+
torch.ops.aten.linear.default,
854+
torch.ops.aten.hardsigmoid.default,
855+
torch.ops.aten.hardsigmoid_.default,
856+
torch.ops.aten.hardswish.default,
857+
torch.ops.aten.hardswish_.default,
858+
],
859+
_check_ir_validity=False,
860+
),
861+
)
862+
863+
pass_manager = CortexMPassManager(edge.exported_program())
864+
edge._edge_programs["forward"] = pass_manager.transform()
865+
866+
return model_quant if args.quantize else None, edge
867+
868+
798869
def to_edge_no_delegate(
799870
exported_program: ExportedProgram,
800871
args,
@@ -873,7 +944,24 @@ def to_edge_no_delegate(
873944

874945
# Quantize if required
875946
model_quant = None
876-
if args.delegate:
947+
if args.target == "cortex-m55+int8":
948+
# Cortex-M path: CMSIS-NN portable kernels, no delegation
949+
if getattr(args, "evaluate", False):
950+
logging.error(
951+
"--evaluate is not supported for target 'cortex-m55+int8' "
952+
"because this path does not use a TOSA delegate."
953+
)
954+
sys.exit(1)
955+
if args.delegate:
956+
logging.warning(
957+
"--delegate is ignored for target 'cortex-m55+int8' "
958+
"(this target does not use delegated ops)."
959+
)
960+
args.delegate = False
961+
model_quant, edge = to_edge_cortex_m(
962+
exported_program, args, model, example_inputs
963+
)
964+
elif args.delegate:
877965
model_quant, edge = to_edge_TOSA_delegate(
878966
exported_program, args, model, example_inputs
879967
)

0 commit comments

Comments
 (0)