pytorch
diff --git a/‎.github/workflows/mlx.yml‎
Lines changed: 5 additions & 1 deletion b/‎.github/workflows/mlx.yml‎
Lines changed: 5 additions & 1 deletion
diff --git a/‎.github/workflows/pull.yml‎
Lines changed: 27 additions & 0 deletions b/‎.github/workflows/pull.yml‎
Lines changed: 27 additions & 0 deletions
diff --git a/‎backends/arm/TARGETS‎
Lines changed: 9 additions & 0 deletions b/‎backends/arm/TARGETS‎
Lines changed: 9 additions & 0 deletions
diff --git a/‎backends/arm/_passes/__init__.py‎
Lines changed: 1 addition & 0 deletions b/‎backends/arm/_passes/__init__.py‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎backends/arm/_passes/arm_pass_manager.py‎
Lines changed: 4 additions & 0 deletions b/‎backends/arm/_passes/arm_pass_manager.py‎
Lines changed: 4 additions & 0 deletions
diff --git a/‎backends/arm/_passes/aten_to_tosa_activation_functions.py‎
Lines changed: 18 additions & 0 deletions b/‎backends/arm/_passes/aten_to_tosa_activation_functions.py‎
Lines changed: 18 additions & 0 deletions
diff --git a/‎backends/arm/_passes/aten_to_tosa_tensor_operators.py‎
Lines changed: 26 additions & 0 deletions b/‎backends/arm/_passes/aten_to_tosa_tensor_operators.py‎
Lines changed: 26 additions & 0 deletions
diff --git a/‎backends/arm/_passes/exir_to_tosa_pass.py‎
Lines changed: 22 additions & 21 deletions b/‎backends/arm/_passes/exir_to_tosa_pass.py‎
Lines changed: 22 additions & 21 deletions
diff --git a/‎backends/arm/_passes/insert_dynamic_padding.py‎
Lines changed: 8 additions & 6 deletions b/‎backends/arm/_passes/insert_dynamic_padding.py‎
Lines changed: 8 additions & 6 deletions
diff --git a/‎backends/arm/_passes/rewrite_conv_pass.py‎
Lines changed: 14 additions & 12 deletions b/‎backends/arm/_passes/rewrite_conv_pass.py‎
Lines changed: 14 additions & 12 deletions
@@ -66,7 +66,11 @@ jobs:
         echo "::endgroup::"
 
         echo "::group::Build test runners"
-        ${CONDA_RUN} cmake --build cmake-out --target op_test_runner multi_thread_test_runner -j$(( $(sysctl -n hw.ncpu) - 1 ))
+        ${CONDA_RUN} cmake --build cmake-out --target op_test_runner multi_thread_test_runner mlx_mutable_state_test -j$(( $(sysctl -n hw.ncpu) - 1 ))
+        echo "::endgroup::"
+
+        echo "::group::Run mutable-state (multi-session) unit test"
+        ./cmake-out/backends/mlx/test/mlx_mutable_state_test
         echo "::endgroup::"
 
         echo "::group::Run op unit tests"
 
@@ -816,6 +816,33 @@ jobs:
         # Test test_arm_backend.sh with test
         backends/arm/test/test_arm_backend.sh "${ARM_TEST}"
 
+  test-arm-backend-public-api-backward-compatibility:
+    name: test-arm-backend-public-api-backward-compatibility
+    uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
+    permissions:
+      id-token: write
+      contents: read
+    with:
+      runner: linux.2xlarge.memory
+      docker-image: ci-image:executorch-ubuntu-22.04-arm-sdk
+      submodules: 'recursive'
+      ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
+      timeout: 120
+      script: |
+        # The generic Linux job chooses to use base env, not the one setup by the image
+        CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
+        conda activate "${CONDA_ENV}"
+
+        source .ci/scripts/utils.sh
+        install_executorch "--use-pt-pinned-commit"
+
+        .ci/scripts/setup-arm-baremetal-tools.sh --enable-mlsdk-deps --install-mlsdk-deps-with-pip
+        source examples/arm/arm-scratch/setup_path.sh
+
+        backends/arm/scripts/public_api_manifest/validate_all_public_api_manifests.sh
+
+        python backends/arm/test/public_api_bc/run_public_api_bc_scenarios.py
+
   test-llama-runner-qnn-linux:
     name: test-llama-runner-qnn-linux
     uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
 
@@ -119,6 +119,15 @@ runtime.python_library(
         "//executorch/exir:lib",
     ],
 )
+runtime.python_library(
+    name = "public_api",
+    srcs = ["__init__.py"],
+    deps = [
+        ":ethosu",
+        ":vgf",
+        "//executorch/backends/arm/quantizer:lib",
+    ],
+)
 
 runtime.python_library(
     name = "process_node",
 
@@ -171,6 +171,7 @@
 from .rewrite_le_lt_to_ge_gt_pass import RewriteLeLtToGeGtPass  # noqa
 from .rewrite_matmul import RewriteMatmulPass  # noqa
 from .rewrite_max_pool2d_pass import RewriteMaxPool2dPass  # noqa
+from .rewrite_mxfp_conv2d import RewriteMXFPConv2dPass  # noqa
 from .rewrite_mxfp_linear import RewriteMXFPLinearPass  # noqa
 from .rewrite_pad import RewritePadPass  # noqa
 from .rewrite_slice import RewriteSlicePass  # noqa
 
@@ -117,6 +117,7 @@
     InsertConstShapesPass,
     InsertControlFlowRescalesPass,
     InsertDataLayoutCastsPass,
+    InsertDynamicPaddingPass,
     InsertInt32CastsAfterInt64PlaceholdersPass,
     InsertRescaleInt32Pass,
     InsertRescalePass,
@@ -146,6 +147,7 @@
     RewriteLeLtToGeGtPass,
     RewriteMatmulPass,
     RewriteMaxPool2dPass,
+    RewriteMXFPConv2dPass,
     RewriteMXFPLinearPass,
     RewritePadPass,
     RewriteSlicePass,
@@ -611,6 +613,7 @@ def _tosa_pipeline(
                 RewriteMaxPool2dPass(),
                 DecomposeAdaptiveMaxPool2dPass(),
                 RewriteConvPass(exported_program),
+                RewriteMXFPConv2dPass(exported_program),
                 RewriteMXFPLinearPass(exported_program),
                 RewriteMatmulPass(),
                 RewritePadPass(),
@@ -632,6 +635,7 @@ def _tosa_pipeline(
                 CastInt64BuffersToInt32Pass(exported_program),
                 FuseEqualPlaceholdersPass(exported_program),
                 SymbolicToTosaShapesPass(),
+                InsertDynamicPaddingPass(),
                 FuseConsecutiveConcatShapesPass(),
                 EnsureUniqueOutputNodesPass(),
                 RemoveNoopPass(),
 
@@ -128,3 +128,21 @@ def rewrite_clamp(node: Node, pass_: AtenToDialectPass) -> DialectNodeSpec | Non
         exir_ops.backend.tosa.CLAMP.default,
         (node.args[0], *min_max_args),
     )
+
+
+def get_activation_replacement(
+    node: Node, pass_: AtenToDialectPass
+) -> DialectNodeSpec | None:
+    # Dispatch activation rewrites from their ATen target to the matching TOSA
+    # dialect node builder.
+    match node.target:
+        case exir_ops.edge.aten.clamp.default:
+            return rewrite_clamp(node, pass_)
+        case exir_ops.edge.aten.erf.default:
+            return rewrite_erf(node, pass_)
+        case exir_ops.edge.aten.sigmoid.default:
+            return rewrite_sigmoid(node, pass_)
+        case exir_ops.edge.aten.tanh.default:
+            return rewrite_tanh(node, pass_)
+        case _:
+            return None
@@ -0,0 +1,26 @@
+# Copyright 2026 Arm Limited and/or its affiliates.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+from typing import cast
+
+from executorch.backends.transforms.aten_to_dialect_pass import (
+    AtenToDialectPass,
+    DialectNodeSpec,
+)
+from executorch.exir.dialects._ops import ops as exir_ops
+from torch.fx import Node
+
+
+def rewrite_argmax(node: Node, pass_: AtenToDialectPass) -> DialectNodeSpec:
+    input_node = cast(Node, node.args[0])
+    dim = cast(int, node.kwargs["dim"] if "dim" in node.kwargs else node.args[1])
+    if dim < 0:
+        dim += len(input_node.meta["val"].shape)
+
+    return DialectNodeSpec(
+        exir_ops.backend.tosa.ARGMAX.default,
+        (input_node, dim),
+        {},
+    )
@@ -5,37 +5,38 @@
 
 import executorch.backends.arm.tosa.dialect  # noqa: F401
 from executorch.backends.arm._passes.aten_to_tosa_activation_functions import (
-    rewrite_clamp,
-    rewrite_erf,
-    rewrite_sigmoid,
-    rewrite_tanh,
+    get_activation_replacement,
+)
+from executorch.backends.arm._passes.aten_to_tosa_tensor_operators import rewrite_argmax
+from executorch.backends.transforms.aten_to_dialect_pass import (
+    AtenToDialectPass,
+    DialectNodeSpec,
 )
-from executorch.backends.transforms.aten_to_dialect_pass import AtenToDialectPass
 from executorch.exir.dialects._ops import ops as exir_ops
+from torch.fx import Node
 
 
 class ExirToTosaPass(AtenToDialectPass):
     """Rewrite simple EXIR ops to equivalent backend TOSA dialect ops.
 
-    Rewrite functions are grouped by op category and registered with the shared
-    ATen-to-dialect pass infrastructure.
+    Rewrite functions are registered with the shared ATen-to-dialect pass
+    infrastructure.
 
     """
 
 
-_ACTIVATION_FUNCTION_REWRITES = {
-    exir_ops.edge.aten.clamp.default: rewrite_clamp,
-    exir_ops.edge.aten.erf.default: rewrite_erf,
-    exir_ops.edge.aten.sigmoid.default: rewrite_sigmoid,
-    exir_ops.edge.aten.tanh.default: rewrite_tanh,
-}
+@ExirToTosaPass.register_dialect_substitution(exir_ops.edge.aten.argmax.default)
+def _get_tensor_operators_replacement(
+    node: Node, pass_: AtenToDialectPass
+) -> DialectNodeSpec:
+    return rewrite_argmax(node, pass_)
 
-_DIRECT_REWRITE_CATEGORIES = {
-    "activation_functions": _ACTIVATION_FUNCTION_REWRITES,
-}
 
-# Register each category's ATen targets with the function that builds the
-# corresponding TOSA dialect node spec.
-for _rewrite_category in _DIRECT_REWRITE_CATEGORIES.values():
-    for _edge_target, _rewrite_fn in _rewrite_category.items():
-        ExirToTosaPass.register_dialect_substitution(_edge_target)(_rewrite_fn)
+@ExirToTosaPass.register_dialect_substitution(exir_ops.edge.aten.clamp.default)
+@ExirToTosaPass.register_dialect_substitution(exir_ops.edge.aten.erf.default)
+@ExirToTosaPass.register_dialect_substitution(exir_ops.edge.aten.sigmoid.default)
+@ExirToTosaPass.register_dialect_substitution(exir_ops.edge.aten.tanh.default)
+def _get_activation_replacement(
+    node: Node, pass_: AtenToDialectPass
+) -> DialectNodeSpec | None:
+    return get_activation_replacement(node, pass_)
@@ -29,6 +29,7 @@ class InsertDynamicPaddingPass(ArmOpTargetedPass):
     _passes_required_after: Set[Type[ExportPass]] = set()
     target_ops = (
         exir_ops.backend.tosa.CONV2D.default,
+        exir_ops.backend.tosa.CONV3D.default,
         exir_ops.backend.tosa.DEPTHWISE_CONV2D.default,
         exir_ops.backend.tosa.MAX_POOL2D.default,
         exir_ops.backend.tosa.AVG_POOL2D.default,
@@ -57,11 +58,12 @@ def call_operator(self, op, args, kwargs, meta, updated=False) -> ProxyValue:
         if not self._is_dynamic_padding(padding):
             return super().call_operator(op, args, kwargs, meta, updated)
 
-        # Create a pad op before conv2d
+        # Create a pad op before the convolution/pool op.
         input_tensor = args[0]
 
         zero_padding_pair = [0, 0]
-        zero_spatial_padding = [0, 0, 0, 0]
+        spatial_rank = 3 if op == exir_ops.backend.tosa.CONV3D.default else 2
+        zero_spatial_padding = [0] * (spatial_rank * 2)
         N_padding = super().call_shape_operator(
             exir_ops.backend.tosa.CONST_SHAPE.default,
             (zero_padding_pair,),
@@ -93,7 +95,7 @@ def call_operator(self, op, args, kwargs, meta, updated=False) -> ProxyValue:
             meta,
             True,
         )
-        new_conv2d_args = list(args)
-        new_conv2d_args[0] = pad_res
-        new_conv2d_args[padding_index] = zero_spatial_padding
-        return super().call_operator(op, tuple(new_conv2d_args), kwargs, meta, updated)
+        new_args = list(args)
+        new_args[0] = pad_res
+        new_args[padding_index] = zero_spatial_padding
+        return super().call_operator(op, tuple(new_args), kwargs, meta, updated)
@@ -97,23 +97,25 @@ def _adjust_pad_if_needed(
 
         if isinstance(mod_remainder, torch.SymInt):
             shape_env = get_context_shape_env()
-            exact_values = evaluate_symbolic_expr_values(
-                mod_remainder.node.expr, shape_env
-            )
+            exact_values = evaluate_symbolic_expr_values(mod_remainder, shape_env)
             if exact_values is not None:
                 mod_remainder_upper = max(exact_values)
+                if len(exact_values) == 1:
+                    mod_remainder = int(next(iter(exact_values)))
+                elif mod_remainder_upper == 0:
+                    mod_remainder = 0
+                else:
+                    return pad - mod_remainder
             else:
-                value_ranges = shape_env.bound_sympy(mod_remainder.node.expr)
-                mod_remainder_upper = int(value_ranges.upper)
-            if mod_remainder_upper == 0:
-                mod_remainder = 0
-        else:
-            mod_remainder_upper = mod_remainder
-
-        if mod_remainder_upper > pad:
+                # SizeAdjustInputPass already trims symbolic remainder classes
+                # that would force negative padding. Keep the symbolic
+                # expression here instead of asking ShapeEnv to normalize it.
+                return pad - mod_remainder
+        if mod_remainder > pad:
             raise RuntimeError(
-                "This case should be handled by the SizeAdjustInputPass, is it enabled?\n"
+                "This case should be handled by SizeAdjustInputPass, is it enabled?\n"
             )
+
         return pad - mod_remainder
 
     def _is_depthwise_conv2d(self, node: torch.fx.Node) -> bool: