pytorch
diff --git a/‎.github/workflows/android-release-artifacts.yml‎
Lines changed: 8 additions & 7 deletions b/‎.github/workflows/android-release-artifacts.yml‎
Lines changed: 8 additions & 7 deletions
diff --git a/‎.github/workflows/android-release-on-tag.yml‎
Lines changed: 4 additions & 0 deletions b/‎.github/workflows/android-release-on-tag.yml‎
Lines changed: 4 additions & 0 deletions
diff --git a/‎backends/arm/_passes/__init__.py‎
Lines changed: 1 addition & 0 deletions b/‎backends/arm/_passes/__init__.py‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎backends/arm/_passes/arm_pass_manager.py‎
Lines changed: 2 additions & 0 deletions b/‎backends/arm/_passes/arm_pass_manager.py‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎backends/arm/_passes/decompose_einsum_pass.py‎
Lines changed: 66 additions & 0 deletions b/‎backends/arm/_passes/decompose_einsum_pass.py‎
Lines changed: 66 additions & 0 deletions
diff --git a/‎backends/arm/_passes/get_decomposition_pass.py‎
Lines changed: 34 additions & 11 deletions b/‎backends/arm/_passes/get_decomposition_pass.py‎
Lines changed: 34 additions & 11 deletions
@@ -145,9 +145,9 @@ jobs:
         export BUILD_AAR_DIR=aar-out
         bash scripts/build_android_library.sh
         mkdir -p "${ARTIFACTS_DIR_NAME}"
-        cp aar-out/executorch.aar "${ARTIFACTS_DIR_NAME}/executorch.aar"
+        cp aar-out/executorch.aar "${ARTIFACTS_DIR_NAME}/executorch-${FLAVOR}.aar"
 
-        shasum -a 256 "${ARTIFACTS_DIR_NAME}/executorch.aar"
+        shasum -a 256 "${ARTIFACTS_DIR_NAME}/executorch-${FLAVOR}.aar"
 
         # Publish to maven staging
         UPLOAD_TO_MAVEN="${{ inputs.upload_to_maven }}"
@@ -172,11 +172,6 @@ jobs:
       - name: Upload AAR RC to AWS S3
         shell: bash
         run: |
-          wget https://gha-artifacts.s3.amazonaws.com/${{ github.repository }}/${{ github.run_id }}/artifacts/executorch.aar
-          shasum -a 256 executorch.aar > executorch.aar.sha256sums
-
-          pip install awscli==1.32.18
-          AWS_CMD="aws s3 cp"
           VERSION="${{ inputs.version }}"
           FLAVOR="${{ inputs.flavor }}"
           if [ -z "$VERSION" ]; then
@@ -185,5 +180,11 @@ jobs:
           if [ -z "$FLAVOR" ]; then
             FLAVOR="xnnpack"
           fi
+          wget https://gha-artifacts.s3.amazonaws.com/${{ github.repository }}/${{ github.run_id }}/artifacts/executorch-${FLAVOR}.aar
+          mv executorch-${FLAVOR}.aar executorch.aar
+          shasum -a 256 executorch.aar > executorch.aar.sha256sums
+
+          pip install awscli==1.32.18
+          AWS_CMD="aws s3 cp"
           ${AWS_CMD} executorch.aar s3://ossci-android/executorch/release/${VERSION}-${FLAVOR}/executorch.aar --acl public-read
           ${AWS_CMD} executorch.aar.sha256sums s3://ossci-android/executorch/release/${VERSION}-${FLAVOR}/executorch.aar.sha256sums --acl public-read
@@ -6,6 +6,10 @@ on:
       - 'v*.*.*-rc*'
       - 'v*.*.*'
 
+permissions:
+  id-token: write
+  contents: read
+
 jobs:
   prepare:
     runs-on: ubuntu-latest
 
@@ -43,6 +43,7 @@
 from .decompose_cumsum_pass import DecomposeCumsumPass  # noqa
 from .decompose_div_pass import DecomposeDivPass  # noqa
 from .decompose_div_tensor_mode import DecomposeDivTensorModePass  # noqa
+from .decompose_einsum_pass import DecomposeEinsumPass  # noqa
 from .decompose_elu_pass import DecomposeEluPass  # noqa
 from .decompose_embedding_pass import DecomposeEmbeddingPass  # noqa  # noqa
 from .decompose_erfinv_pass import DecomposeErfinvPass  # noqa
 
@@ -51,6 +51,7 @@
     DecomposeCumsumPass,
     DecomposeDivPass,
     DecomposeDivTensorModePass,
+    DecomposeEinsumPass,
     DecomposeEluPass,
     DecomposeEmbeddingPass,
     DecomposeErfinvPass,
@@ -560,6 +561,7 @@ def transform_for_annotation_pipeline(self, graph_module: GraphModule):
                 DecomposeFloorDividePass(tfa_pass=True),
                 DecomposeDivTensorModePass(tfa_pass=True),
                 DecomposeWhereScalarOtherPass(tfa_pass=True),
+                DecomposeEinsumPass(tfa_pass=True),
                 RewriteInplaceArithmeticPass(tfa_pass=True),
                 DecomposeAddSubAlphaPass(tfa_pass=True),
                 DecomposeLeakyReLUPass(tfa_pass=True),
 
@@ -0,0 +1,66 @@
+# Copyright 2026 Arm Limited and/or its affiliates.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+import torch
+from executorch.backends.arm._passes.get_decomposition_pass import GetDecompositionPass
+
+
+class DecomposeEinsumPass(GetDecompositionPass):
+    """Decomposes aten.einsum.default into more primitive ops.
+
+    This pass is intended to be called in transform_for_annotation to prepare
+    the graph for quantization. Einsum is not annotated directly by the Arm
+    quantizer, but the decomposed ops are.
+
+    """
+
+    targeted_ops = [torch.ops.aten.einsum.default]
+
+    def _get_input_tensors(self, node: torch.fx.Node) -> list:
+        """Override the base hook because aten.einsum.default takes (equation,
+        [operands]), which cannot be handled by the generic one-arg-per-input
+        logic.
+        """
+        equation, operands = node.args  # type: ignore[union-attr]
+        fake_operands = [operand.meta["val"] for operand in operands]  # type: ignore[union-attr]
+        return [equation, fake_operands]
+
+    def _get_placeholder_map(
+        self,
+        node: torch.fx.Node,
+        decomposed_module: torch.fx.GraphModule,
+    ) -> dict[str, torch.fx.Node]:
+        """Override the base hook because einsum does not trace placeholders
+        one-to-one with node.args.
+
+        The traced graph includes arg0_1 for the equation string and arg1_i for
+        each tensor inside the operand list, so we must skip the equation
+        placeholder, which is not an original FX tensor node, and map each
+        operand placeholder back to the corresponding original FX node.
+
+        """
+        _, operands = node.args
+        name_to_input_tensor_map = {}
+
+        for decomposed_node in decomposed_module.graph.nodes:
+            if decomposed_node.op != "placeholder":
+                continue
+            if decomposed_node.name == "arg0_1":
+                continue
+            if not decomposed_node.name.startswith("arg1_"):
+                raise RuntimeError(
+                    f"Unexpected einsum placeholder name {decomposed_node.name!r}."
+                )
+
+            operand_idx = int(decomposed_node.name.split("_")[1]) - 1
+            name_to_input_tensor_map[decomposed_node.name] = operands[operand_idx]  # type: ignore[index]
+
+        return name_to_input_tensor_map  # type: ignore[return-value]
+
+    def _get_output_node(self, output_node: torch.fx.Node) -> torch.fx.Node:
+        """Return the traced value node for einsum graphs that emit
+        output([node]).
+        """
+        return output_node.args[0][0]  # type: ignore[index, return-value]
@@ -34,6 +34,31 @@ def __init__(self, tfa_pass=False, *args, **kwargs):
     def _skip_pass(self, input_tensors: list) -> bool:
         return False
 
+    def _get_input_tensors(self, node: torch.fx.Node) -> list:
+        input_tensors = []
+        for arg in node.args:
+            if hasattr(arg, "meta"):
+                input_tensors.append(arg.meta["val"])  # type: ignore[union-attr]
+            elif isinstance(arg, int):
+                input_tensors.append(arg)
+        return input_tensors
+
+    def _get_placeholder_map(
+        self,
+        node: torch.fx.Node,
+        decomposed_module: torch.fx.GraphModule,
+    ) -> dict[str, torch.fx.Node]:
+        # Keep decomposed_module in the hook signature so subclasses can inspect
+        # traced placeholder structure when the mapping is not one-to-one.
+        name_to_input_tensor_map = {}
+        for i, arg in enumerate(node.args):
+            name_to_input_tensor_map[f"arg{i}_1"] = arg
+        return name_to_input_tensor_map  # type: ignore[return-value]
+
+    def _get_output_node(self, output_node: torch.fx.Node) -> torch.fx.Node:
+        """Return the traced value node for graphs that emit output(node)."""
+        return output_node.args[0]  # type: ignore[return-value]
+
     def call(self, graph_module: torch.fx.GraphModule) -> PassResult:  # noqa: C901
         modified = False
         for node in graph_module.graph.nodes:
@@ -44,13 +69,7 @@ def call(self, graph_module: torch.fx.GraphModule) -> PassResult:  # noqa: C901
             ):
                 continue
 
-            input_tensors = []
-            for arg in node.args:
-                if hasattr(arg, "meta"):
-                    input_tensors.append(arg.meta["val"])
-
-                elif isinstance(arg, int):
-                    input_tensors.append(arg)
+            input_tensors = self._get_input_tensors(node)
 
             if self._skip_pass(input_tensors):
                 continue
@@ -70,22 +89,26 @@ def call(self, graph_module: torch.fx.GraphModule) -> PassResult:  # noqa: C901
             )(*input_tensors)
 
             with graph_module.graph.inserting_before(node):
-                name_to_input_tensor_map = {}
-                for i, arg in enumerate(node.args):
-                    name_to_input_tensor_map[f"arg{i}_1"] = arg
+                name_to_input_tensor_map = self._get_placeholder_map(
+                    node, decomposed_module
+                )
 
                 decomposed_node_to_subgraph_node = {}
                 last_decomposed_node = None
                 # Create a mapping from input nodes in decomposed module to original nodes.
                 # In decomposed module, there are only input tensors for placeholder op.
                 for decomposed_node in decomposed_module.graph.nodes:
                     if decomposed_node.op == "placeholder":
+                        # Some ops, such as einsum, trace extra placeholders that do
+                        # not map back to original graph tensor inputs.
+                        if decomposed_node.name not in name_to_input_tensor_map:
+                            continue
                         decomposed_node_to_subgraph_node[decomposed_node] = (
                             name_to_input_tensor_map[decomposed_node.name]
                         )
 
                     if decomposed_node.op == "output":
-                        last_decomposed_node = decomposed_node.args[0]
+                        last_decomposed_node = self._get_output_node(decomposed_node)
 
                 # Copy node from decompose graph module
                 for decomposed_node in decomposed_module.graph.nodes: