Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
62 changes: 39 additions & 23 deletions backends/arm/operator_support/ethos_u55_support.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@

"""
import typing
from itertools import combinations

import torch
import torch.fx as fx
Expand Down Expand Up @@ -281,20 +282,37 @@ def __init__(self, reporter: WhyNoPartitionReporter):

_MAX_AXIS_PRODUCT = 65536

def axes_product(self, shape: shape_t) -> int:
"""Return the product of all axes in ``shape``.

def _max_product_axis(self, shape: shape_t):
"""
Args:
shape (shape_t): Shape.

Comment on lines +285 to 289
Copy link

Copilot AI Apr 22, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

_max_product_axis’s docstring is misleading/inaccurate: it says it “Return[s] shape padded to rank4” but the function actually returns a boolean, and it doesn’t pad the shape. Consider renaming the helper to reflect that it validates axis-product constraints (and add a -> bool return type), and update the docstring to describe the boolean semantics and the rule being enforced.

Copilot uses AI. Check for mistakes.
Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ACK will fix.

Returns:
int: Product of the axis sizes.

True if the TRANSPOSE can be run on the Ethos-U55
False if the TRANSPOSE cannot be run on the Ethos-U55

For a tensor of rank N, the product of any combination of
N - 2 axis needs to be less than 65536. E.g. for rank 4 tensor,
N*H, N*W, N*C, H*W, H*C, W*C should all be lower than 65536 to
be able to run the TRANSPOSE on Ethos-U55.
The full TRANSPOSE requirements for the Ethos-U55 are listed in
https://gitlab.arm.com/artificial-intelligence/ethos-u/ethos-u-vela/-/blob/main/SUPPORTED_OPS.md
"""
product = 1
for axes in shape:
product *= axes
return product
rank = len(shape)
if rank < 3:
product = 1
for idx in shape:
product *= idx
return product <= self._MAX_AXIS_PRODUCT

else:
for axes in combinations(range(rank), rank - 2):
product = 1
for idx in axes:
product *= shape[idx]
if product > self._MAX_AXIS_PRODUCT:
return False
return True

def _check_rank_constraints(
self,
Expand Down Expand Up @@ -322,11 +340,11 @@ def _check_rank_constraints(
output_rank = len(output_shape)

if input_rank > 4:
if self.axes_product(input_shape) > self._MAX_AXIS_PRODUCT:
if not (self._max_product_axis(input_shape)):
self.reporter.report_reject(
node,
f"Input may require transpose operator. No support for {input_shape=}, "
f"{dtype=}. Product of axes must be <={self._MAX_AXIS_PRODUCT}",
f"{dtype=}. Product of any rank - 2 axes must be <={self._MAX_AXIS_PRODUCT}",
)
return False
if dtype == torch.int32:
Expand All @@ -337,12 +355,12 @@ def _check_rank_constraints(
return False

if output_rank > 4:
if self.axes_product(output_shape) > self._MAX_AXIS_PRODUCT:
if not (self._max_product_axis(output_shape)):
shape = output_shape
self.reporter.report_reject(
node,
f"Operator may require transpose operator. No support for {shape=}, "
f"{dtype=}. Product of axes must be <={self._MAX_AXIS_PRODUCT}",
f"{dtype=}. Product of any rank - 2 axes must be <={self._MAX_AXIS_PRODUCT}",
)
return False
if dtype == torch.int32:
Expand Down Expand Up @@ -450,24 +468,22 @@ def _check_transpose_constraints(
)
return False

if (
needs_input_transpose
and self.axes_product(input_shape) > self._MAX_AXIS_PRODUCT
):
# For TRANSPOSE originating from a VIEW, we know we will only do
# NHWC -> NCHW or NCHW -> NHWC permutations, hence we only need to validate
# these two TRANSPOSEs. For the general case of any permutation on TRANSPOSE,
# we reason via the checks in EthosU55TransposeCheck
if needs_input_transpose and not (self._max_product_axis(input_shape)):
self.reporter.report_reject(
Comment on lines +471 to 476
Copy link

Copilot AI Apr 22, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The new _max_product_axis constraint for view-inserted transposes (checking all (rank-2)-axis products) is stricter than the existing U55 permute constraints encoded in EthosU55TransposeCheck for NHWC<->NCHW-like permutations (which only bounds N*H, W, and C). This divergence can lead to inconsistent accept/reject decisions between a VIEW-induced transpose and an explicit permute_copy. Consider reusing/factoring the transpose constraint logic so both paths enforce the same rule set for the relevant permutation(s).

Copilot uses AI. Check for mistakes.
Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You are right. This the TRANSPOSE constraints will need to be updated in the future, but prefer to do it in a separate pr.

node,
f"Operator requires transpose operator. No support for {input_shape=}, "
f"{dtype=}. Product of axes must be <{self._MAX_AXIS_PRODUCT}",
f"{dtype=}. Product of any rank - 2 axes must be <={self._MAX_AXIS_PRODUCT}",
)
return False
if (
needs_output_transpose
and self.axes_product(output_shape) > self._MAX_AXIS_PRODUCT
):
if needs_output_transpose and not (self._max_product_axis(output_shape)):
self.reporter.report_reject(
node,
f"Operator requires transpose operator. No support for {output_shape=}, "
f"{dtype=}. Product of axes must be <{self._MAX_AXIS_PRODUCT}",
f"{dtype=}. Product of any rank - 2 axes must be <={self._MAX_AXIS_PRODUCT}",
)
return False

Expand Down
8 changes: 6 additions & 2 deletions backends/arm/test/ops/test_view.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,8 @@ class View(torch.nn.Module):
"rand_5d_5d": lambda: (torch.rand(1, 1, 4, 5, 6), (1, 1, 4, -1, 6)),
"rand_5d_3d": lambda: (torch.rand(1, 1, 4, 5, 6), (2, 3, -1)),
"rand_3d_5d": lambda: (torch.rand(4, 5, 6), (1, 1, 2, -1, 3)),
"rank4_rank3_large": lambda: (torch.rand(1, 256, 6, 48), (6, 48, 256)),
"rank5_rank4_large": lambda: (torch.rand(1, 256, 2, 3, 48), (1, 256, 6, 48)),
}

needs_transpose_tests_fp16 = {
Expand All @@ -65,8 +67,7 @@ class View(torch.nn.Module):
}

rank_product_too_large = {
"rand_4d_large": lambda: (torch.rand(1, 49, 16, 128), (1, 16, 49, 128)),
"rand_5d_large": lambda: (torch.rand(2, 25, 16, 8, 64), (2, 16, 25, 8, 64)),
"rand_5d_large": lambda: (torch.rand(2, 256, 512, 8, 64), (2, 512, 256, 8, 64)),
Copy link

Copilot AI Apr 22, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

rand_5d_large allocates a very large tensor (2×256×512×8×64 ≈ 134M elements ≈ 512MB in fp32), which is likely to cause OOMs or very slow CI runs. Consider using a much smaller shape that still violates the U55 transpose axis-product constraint (e.g. choose a rank-5 shape where at least one (rank-2) axis-product exceeds 65536 but total elements stay small).

Suggested change
"rand_5d_large": lambda: (torch.rand(2, 256, 512, 8, 64), (2, 512, 256, 8, 64)),
"rand_5d_large": lambda: (torch.rand(1, 257, 256, 1, 2), (1, 256, 257, 1, 2)),

Copilot uses AI. Check for mistakes.
Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, that is expected. I am not testing the model can run on the NPU, i am testing if the TRANSPOSE is correctly rejected. The condition that the product of any 2 pairs of axes should exceed 2**16 to be rejected requires really big tensors to test the rejection.

}

def __init__(self, new_shape):
Expand Down Expand Up @@ -116,6 +117,9 @@ def test_view_u55_INT(test_data: Tuple):
aten_op,
exir_ops=[],
)
pipeline.change_args(
"check_not.exir", ["executorch_exir_dialects_edge__ops_aten_view_copy_default"]
)
pipeline.run()


Expand Down
Loading