Skip to content

Commit 3e9f625

Browse files
committed
Update base for Update on "[ET Device Support] Schema changes: device info on Tensor and buffer-level device array"
This diff adds device placement information to the ExecuTorch schema to support representing tensor-level device type information, which will be the basic requirement for the following tensor_parser updates. This is part of the Phase 1 implementation to make ET device type work E2E without user-specified device placement. Design doc: https://docs.google.com/document/d/1lwd9BlohmwkN5EEvRulO_b-XnZBwv1nMb5l2K3jfuwA/edit?tab=t.0#heading=h.o6anuvkix4bu Differential Revision: [D93635657](https://our.internmc.facebook.com/intern/diff/D93635657/) [ghstack-poisoned]
2 parents fcc8f9a + 9076110 commit 3e9f625

39 files changed

Lines changed: 1520 additions & 131 deletions

.github/workflows/cuda.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -132,7 +132,7 @@ jobs:
132132
# Build executor_runner (needed by CUDA backend e2e tests)
133133
cmake --build cmake-out --target executor_runner
134134
135-
# Run all CUDA backend Python tests (including chunk_gated_delta e2e)
135+
# Run CUDA backend Python tests
136136
python -m pytest backends/cuda/tests backends/cuda/passes/tests -v -o "addopts="
137137
138138
export-model-cuda-artifact:

.github/workflows/pull.yml

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -607,6 +607,36 @@ jobs:
607607
exit 1
608608
fi
609609
610+
test-mcu-cortex-m-backend:
611+
name: test-mcu-cortex-m-backend
612+
uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
613+
permissions:
614+
id-token: write
615+
contents: read
616+
with:
617+
runner: linux.2xlarge.memory
618+
docker-image: ci-image:executorch-ubuntu-22.04-arm-sdk
619+
submodules: 'recursive'
620+
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
621+
timeout: 120
622+
script: |
623+
# The generic Linux job chooses to use base env, not the one setup by the image
624+
CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
625+
conda activate "${CONDA_ENV}"
626+
627+
source .ci/scripts/utils.sh
628+
install_executorch "--use-pt-pinned-commit"
629+
630+
# Install arm dependencies
631+
.ci/scripts/setup-arm-baremetal-tools.sh
632+
source examples/arm/arm-scratch/setup_path.sh
633+
634+
# To build cortex-m test runner
635+
backends/cortex_m/test/build_test_runner.sh
636+
637+
# To run cortex_m tests
638+
pytest --config-file=backends/arm/test/pytest.ini backends/cortex_m/test
639+
610640
android:
611641
uses: ./.github/workflows/_android.yml
612642
permissions:

.github/workflows/trunk.yml

Lines changed: 0 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -1054,33 +1054,3 @@ jobs:
10541054
10551055
.ci/scripts/test_model.ps1 -modelName ${{ matrix.model }} -backend ${{ matrix.backend }}
10561056
}"
1057-
1058-
test-mcu-cortex-m-backend:
1059-
name: test-mcu-cortex-m-backend
1060-
uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
1061-
permissions:
1062-
id-token: write
1063-
contents: read
1064-
with:
1065-
runner: linux.2xlarge.memory
1066-
docker-image: ci-image:executorch-ubuntu-22.04-arm-sdk
1067-
submodules: 'recursive'
1068-
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
1069-
timeout: 120
1070-
script: |
1071-
# The generic Linux job chooses to use base env, not the one setup by the image
1072-
CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
1073-
conda activate "${CONDA_ENV}"
1074-
1075-
source .ci/scripts/utils.sh
1076-
install_executorch "--use-pt-pinned-commit"
1077-
1078-
# Install arm dependencies
1079-
.ci/scripts/setup-arm-baremetal-tools.sh
1080-
source examples/arm/arm-scratch/setup_path.sh
1081-
1082-
# To build cortex-m test runner
1083-
backends/cortex_m/test/build_test_runner.sh
1084-
1085-
# To run cortex_m tests
1086-
pytest --config-file=backends/arm/test/pytest.ini backends/cortex_m/test

backends/arm/MODELS.md

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,17 @@
11
<!-- Copyright 2025-2026 Arm Limited and/or its affiliates. -->
22
# The following file contains all models that have been confirmed to be functional and tested for the Arm backend:
3+
# Note: Deep AutoEncoder requires manual Linear+BatchNorm1d fusion as the quantizer does not yet support this pattern.
4+
# Note: DS CNN requires AvgPool2d workaround for Ethos-U55 due to stride > 3 limitation.
35
- Conformer
6+
- Deep AutoEncoder
47
- Deit Tiny
58
- DeepLab v3 (DL3)
9+
- DS CNN
610
- Inception v3 (IC3)
711
- Llama
812
- Gemma3n
913
- Long Short-Term Memory (LSTM)
14+
- MobileNet V1 0.25
1015
- MobileNet v2 (MV2)
1116
- MobileNet v3 (MV3)
1217
- Some popular torch.nn.functional models (NN functional)
@@ -16,6 +21,7 @@
1621
- Neural Super Sampler (NSS)
1722
- Phi-3
1823
- ResNet 18
24+
- ResNet-8
1925
- Wav2Letter (W2L)
2026
- Stable Diffusion:
2127
* CLIP Text Encoder (CLIP Text with Projection)
Lines changed: 123 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,123 @@
1+
# Copyright 2026 Arm Limited and/or its affiliates.
2+
#
3+
# This source code is licensed under the BSD-style license found in the
4+
# LICENSE file in the root directory of this source tree.
5+
"""Ethos-U FVP tests for the MLPerf Tiny anomaly detection Deep AutoEncoder."""
6+
7+
from typing import Tuple
8+
9+
import pytest
10+
import torch
11+
import torch.nn as nn
12+
from executorch.backends.arm.test import common
13+
from executorch.backends.arm.test.tester.test_pipeline import (
14+
EthosU55PipelineINT,
15+
EthosU85PipelineINT,
16+
TosaPipelineFP,
17+
TosaPipelineINT,
18+
)
19+
20+
from executorch.examples.models.mlperf_tiny import DeepAutoEncoderModel
21+
from torch.nn.utils.fusion import fuse_linear_bn_eval
22+
23+
24+
def _fuse_linear_bn(mod: nn.Module) -> nn.Module:
25+
"""Fuse Linear + BatchNorm1d pairs in the model.
26+
27+
The TOSA quantizer does not annotate linear+batch_norm patterns, so we fold
28+
the BatchNorm1d into the preceding Linear before export.
29+
TODO: Remove once the quantizer supports linear+bn.
30+
31+
"""
32+
if not isinstance(mod, nn.Sequential):
33+
for name, child in mod.named_children():
34+
setattr(mod, name, _fuse_linear_bn(child))
35+
return mod
36+
new_layers = []
37+
layers = list(mod)
38+
i = 0
39+
while i < len(layers):
40+
if (
41+
isinstance(layers[i], nn.Linear)
42+
and i + 1 < len(layers)
43+
and isinstance(layers[i + 1], nn.BatchNorm1d)
44+
):
45+
new_layers.append(fuse_linear_bn_eval(layers[i], layers[i + 1])) # type: ignore[type-var, arg-type]
46+
i += 2
47+
else:
48+
new_layers.append(_fuse_linear_bn(layers[i]))
49+
i += 1
50+
return nn.Sequential(*new_layers)
51+
52+
53+
_wrapper = DeepAutoEncoderModel()
54+
model = _fuse_linear_bn(_wrapper.get_eager_model())
55+
model_inputs = _wrapper.get_example_inputs()
56+
input_t = Tuple[torch.Tensor]
57+
58+
quant_test_data = {
59+
"per_channel_quantization=true": True,
60+
"per_channel_quantization=false": False,
61+
}
62+
63+
64+
def test_deep_autoencoder_tosa_FP():
65+
pipeline = TosaPipelineFP[input_t](
66+
model,
67+
model_inputs,
68+
aten_op=[],
69+
exir_op=[],
70+
use_to_edge_transform_and_lower=True,
71+
)
72+
pipeline.run()
73+
74+
75+
@common.parametrize("per_channel_quantization", quant_test_data)
76+
def test_deep_autoencoder_tosa_INT(per_channel_quantization):
77+
pipeline = TosaPipelineINT[input_t](
78+
model,
79+
model_inputs,
80+
aten_op=[],
81+
exir_op=[],
82+
use_to_edge_transform_and_lower=True,
83+
per_channel_quantization=per_channel_quantization,
84+
atol=0.25,
85+
qtol=1,
86+
frobenius_threshold=None,
87+
cosine_threshold=None,
88+
)
89+
pipeline.run()
90+
91+
92+
@pytest.mark.slow
93+
@common.XfailIfNoCorstone300
94+
@common.parametrize("per_channel_quantization", quant_test_data)
95+
def test_deep_autoencoder_u55_INT(per_channel_quantization):
96+
pipeline = EthosU55PipelineINT[input_t](
97+
model,
98+
model_inputs,
99+
aten_ops=[],
100+
exir_ops=[],
101+
use_to_edge_transform_and_lower=True,
102+
per_channel_quantization=per_channel_quantization,
103+
atol=0.25,
104+
qtol=1,
105+
)
106+
pipeline.run()
107+
108+
109+
@pytest.mark.slow
110+
@common.XfailIfNoCorstone320
111+
@common.parametrize("per_channel_quantization", quant_test_data)
112+
def test_deep_autoencoder_u85_INT(per_channel_quantization):
113+
pipeline = EthosU85PipelineINT[input_t](
114+
model,
115+
model_inputs,
116+
aten_ops=[],
117+
exir_ops=[],
118+
use_to_edge_transform_and_lower=True,
119+
per_channel_quantization=per_channel_quantization,
120+
atol=0.25,
121+
qtol=1,
122+
)
123+
pipeline.run()
Lines changed: 97 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,97 @@
1+
# Copyright 2026 Arm Limited and/or its affiliates.
2+
#
3+
# This source code is licensed under the BSD-style license found in the
4+
# LICENSE file in the root directory of this source tree.
5+
"""Ethos-U FVP tests for the MLPerf Tiny Keyword Spotting DS-CNN model."""
6+
7+
from typing import Tuple
8+
9+
import pytest
10+
import torch
11+
import torch.nn as nn
12+
from executorch.backends.arm.test import common
13+
from executorch.backends.arm.test.tester.test_pipeline import (
14+
EthosU55PipelineINT,
15+
EthosU85PipelineINT,
16+
TosaPipelineFP,
17+
TosaPipelineINT,
18+
)
19+
20+
from executorch.examples.models.mlperf_tiny import DSCNNKWSModel
21+
22+
_wrapper = DSCNNKWSModel()
23+
model = _wrapper.get_eager_model()
24+
# TODO: Remove once a pass decomposes large-stride AvgPool2d.
25+
# Replace AvgPool2d(24,5) with AdaptiveAvgPool2d(1) so the
26+
# DecomposeAdaptiveAvgPool2dPass can break it into stride-1
27+
# pools that satisfy the Ethos-U55 stride <= 3 constraint.
28+
model.pool = nn.AdaptiveAvgPool2d(output_size=1) # type: ignore[assignment]
29+
model_inputs = _wrapper.get_example_inputs()
30+
input_t = Tuple[torch.Tensor]
31+
32+
quant_test_data = {
33+
"per_channel_quantization=true": True,
34+
"per_channel_quantization=false": False,
35+
}
36+
37+
38+
def test_ds_cnn_tosa_FP():
39+
pipeline = TosaPipelineFP[input_t](
40+
model,
41+
model_inputs,
42+
aten_op=[],
43+
exir_op=[],
44+
use_to_edge_transform_and_lower=True,
45+
)
46+
pipeline.run()
47+
48+
49+
@common.parametrize("per_channel_quantization", quant_test_data)
50+
def test_ds_cnn_tosa_INT(per_channel_quantization):
51+
pipeline = TosaPipelineINT[input_t](
52+
model,
53+
model_inputs,
54+
aten_op=[],
55+
exir_op=[],
56+
use_to_edge_transform_and_lower=True,
57+
per_channel_quantization=per_channel_quantization,
58+
atol=0.25,
59+
qtol=1,
60+
frobenius_threshold=None,
61+
cosine_threshold=None,
62+
)
63+
pipeline.run()
64+
65+
66+
@pytest.mark.slow
67+
@common.XfailIfNoCorstone300
68+
@common.parametrize("per_channel_quantization", quant_test_data)
69+
def test_ds_cnn_u55_INT(per_channel_quantization):
70+
pipeline = EthosU55PipelineINT[input_t](
71+
model,
72+
model_inputs,
73+
aten_ops=[],
74+
exir_ops=[],
75+
use_to_edge_transform_and_lower=True,
76+
per_channel_quantization=per_channel_quantization,
77+
atol=0.25,
78+
qtol=1,
79+
)
80+
pipeline.run()
81+
82+
83+
@pytest.mark.slow
84+
@common.XfailIfNoCorstone320
85+
@common.parametrize("per_channel_quantization", quant_test_data)
86+
def test_ds_cnn_u85_INT(per_channel_quantization):
87+
pipeline = EthosU85PipelineINT[input_t](
88+
model,
89+
model_inputs,
90+
aten_ops=[],
91+
exir_ops=[],
92+
use_to_edge_transform_and_lower=True,
93+
per_channel_quantization=per_channel_quantization,
94+
atol=0.25,
95+
qtol=1,
96+
)
97+
pipeline.run()

0 commit comments

Comments
 (0)