psiddh
diff --git a/‎.ci/scripts/benchmark_tooling/get_benchmark_analysis_data.py‎
Lines changed: 1 addition & 1 deletion b/‎.ci/scripts/benchmark_tooling/get_benchmark_analysis_data.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎.ci/scripts/test_lora.sh‎
Lines changed: 21 additions & 15 deletions b/‎.ci/scripts/test_lora.sh‎
Lines changed: 21 additions & 15 deletions
diff --git a/‎.ci/scripts/tests/test_get_benchmark_analysis_data.py‎
Lines changed: 6 additions & 2 deletions b/‎.ci/scripts/tests/test_get_benchmark_analysis_data.py‎
Lines changed: 6 additions & 2 deletions
diff --git a/‎.github/workflows/doc-build.yml‎
Lines changed: 4 additions & 2 deletions b/‎.github/workflows/doc-build.yml‎
Lines changed: 4 additions & 2 deletions
diff --git a/‎CONTRIBUTING.md‎
Lines changed: 3 additions & 2 deletions b/‎CONTRIBUTING.md‎
Lines changed: 3 additions & 2 deletions
diff --git a/‎backends/arm/_passes/__init__.py‎
Lines changed: 1 addition & 0 deletions b/‎backends/arm/_passes/__init__.py‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎backends/arm/_passes/arm_pass_manager.py‎
Lines changed: 2 additions & 0 deletions b/‎backends/arm/_passes/arm_pass_manager.py‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎backends/arm/_passes/decompose_as_strided_copy_pass.py‎
Lines changed: 113 additions & 0 deletions b/‎backends/arm/_passes/decompose_as_strided_copy_pass.py‎
Lines changed: 113 additions & 0 deletions
diff --git a/‎backends/arm/common/as_strided_utils.py‎
Lines changed: 70 additions & 0 deletions b/‎backends/arm/common/as_strided_utils.py‎
Lines changed: 70 additions & 0 deletions
diff --git a/‎backends/arm/operator_support/__init__.py‎
Lines changed: 1 addition & 0 deletions b/‎backends/arm/operator_support/__init__.py‎
Lines changed: 1 addition & 0 deletions
@@ -104,7 +104,7 @@ class BenchmarkFilters:
 
 BASE_URLS = {
     "local": "http://localhost:3000",
-    "prod": "https://hud.pytorch.org",
+    "prod": "https://hud.pytorch.org",  # @lint-ignore
 }
 
 
 
@@ -117,18 +117,18 @@ else
   echo "Expected result prefix: ${EXPECTED_PREFIX}"
   echo "Actual result: ${RESULT}"
   echo "Test 2: Failure; results not the same"
-#   cleanup_files
+  cleanup_files
   exit 1
 fi
 
 # Confirm file sizes.
 FOUNDATION_SIZE=$(stat -c%s qwen_foundation.ptd)
 if [[ $FOUNDATION_SIZE -le "2400000000" ]]; then
-    echo "qwen_foundation_q.ptd size is: $FOUNDATION_SIZE"
+  echo "qwen_foundation_q.ptd size is: $FOUNDATION_SIZE"
 else
-    echo "qwen_foundation_q.ptd size: $FOUNDATION_SIZE is greater than threshold 2.4GB"
-    cleanup_files
-    exit 1
+  echo "qwen_foundation_q.ptd size: $FOUNDATION_SIZE is greater than threshold 2.4GB"
+  cleanup_files
+  exit 1
 fi
 
 ### QUANTIZATION & PROGRAM DATA SEPARATION ###
@@ -161,13 +161,19 @@ $PYTHON_EXECUTABLE -m extension.llm.export.export_llm \
     +quantization.qmode="8da4w" \
     +quantization.group_size=32
 
-# Confirm that qwen_foundation_lora_q.ptd and qwen_foundation_q.ptd are the same.
-if diff -q qwen_foundation_lora_q.ptd qwen_foundation_q.ptd > /dev/null; then
-    echo "qwen_foundation_lora_q.ptd and qwen_foundation_q.ptd are identical."
+# Confirm that qwen_foundation_lora_q.ptd and qwen_foundation_q.ptd are the same size.
+# TODO(lfq): confirm they are the same (deserialize and check fields)
+size1=$(stat -c%s qwen_foundation_lora_q.ptd)
+size2=$(stat -c%s qwen_foundation_q.ptd)
+
+if [ "$size1" -eq "$size2" ]; then
+  echo "qwen_foundation_lora_q.ptd and qwen_foundation_q.ptd are the same size: $size1."
 else
-    echo "qwen_foundation_lora_q.ptd and qwen_foundation_q.ptd are not identical."
-    cleanup_files
-    exit 1
+  echo "qwen_foundation_lora_q.ptd and qwen_foundation_q.ptd have different sizes."
+  ls -la qwen_foundation_lora_q.ptd
+  ls -la qwen_foundation_q.ptd
+  cleanup_files
+  exit 1
 fi
 
 # Run quantized qwen model (no adapter).
@@ -214,11 +220,11 @@ fi
 # Confirm qwen_foundation_q.ptd file size.
 FOUNDATION_Q_SIZE=$(stat -c%s qwen_foundation_q.ptd)
 if [[ $FOUNDATION_Q_SIZE -le "1000000000" ]]; then
-    echo "qwen_foundation_q.ptd size is: $FOUNDATION_Q_SIZE"
+  echo "qwen_foundation_q.ptd size is: $FOUNDATION_Q_SIZE"
 else
-    echo "qwen_foundation_q.ptd size: $FOUNDATION_Q_SIZE is greater than threshold 1GB"
-    cleanup_files
-    exit 1
+  echo "qwen_foundation_q.ptd size: $FOUNDATION_Q_SIZE is greater than threshold 1GB"
+  cleanup_files
+  exit 1
 fi
 
 cleanup_files
@@ -179,7 +179,9 @@ def setUp(self):
     def test_init(self):
         """Test initialization of ExecutorchBenchmarkFetcher."""
         self.assertEqual(self.fetcher.env, "prod")
-        self.assertEqual(self.fetcher.base_url, "https://hud.pytorch.org")
+        self.assertEqual(
+            self.fetcher.base_url, "https://hud.pytorch.org"  # @lint-ignore
+        )
         self.assertEqual(
             self.fetcher.query_group_table_by_fields,
             ["model", "backend", "device", "arch"],
@@ -193,7 +195,9 @@ def test_init(self):
 
     def test_get_base_url(self):
         """Test _get_base_url method."""
-        self.assertEqual(self.fetcher._get_base_url(), "https://hud.pytorch.org")
+        self.assertEqual(
+            self.fetcher._get_base_url(), "https://hud.pytorch.org"  # @lint-ignore
+        )
 
         # Test with local environment
         local_fetcher = self.module.ExecutorchBenchmarkFetcher(env="local")
 
@@ -117,10 +117,12 @@ jobs:
         fi
         echo "Target Folder: ${TARGET_FOLDER}"
 
-        mkdir -p "${TARGET_FOLDER}"
         # Clean up target folder if exists and copy html output to the
         # Target folder
-        rm -rf "${TARGET_FOLDER}"/*
+        if [ -d "${TARGET_FOLDER}" ]; then
+          rm -rf "${TARGET_FOLDER}"
+        fi
+        mkdir -p "${TARGET_FOLDER}"
         mv "${RUNNER_ARTIFACT_DIR}"/html/* "${TARGET_FOLDER}"
         git add "${TARGET_FOLDER}" || true
 
 
@@ -26,7 +26,7 @@ executorch
 │   ├── <a href="backends/apple">apple</a> - Apple-specific backends.
 │   │   ├── <a href="backends/apple/coreml">coreml</a> - CoreML backend for Apple devices. See <a href="docs/source/backends/coreml/coreml-overview.md">doc</a>.
 │   │   └── <a href="backends/apple/mps">mps</a> - Metal Performance Shaders backend for Apple devices. See <a href="docs/source/backends/mps/mps-overview.md">doc</a>.
-│   ├── <a href="backends/arm">arm</a> - ARM architecture backends. See <a href="docs/source/backends-arm-ethos-u.md">doc</a>.
+│   ├── <a href="backends/arm">arm</a> - ARM architecture backends. See <a href="docs/source/backends/arm-ethos-u/arm-ethos-u-overview.md">doc</a>.
 │   ├── <a href="backends/cadence">cadence</a> - Cadence-specific backends. See <a href="docs/source/backends-cadence.md">doc</a>.
 │   ├── <a href="backends/example">example</a> - Example backend implementations.
 │   ├── <a href="backends/mediatek">mediatek</a> - MediaTek-specific backends. See <a href="docs/source/backends-mediatek.md">doc</a>.
@@ -324,7 +324,8 @@ the code you're modifying and find an author who has more context. Ask them
 for their help in the PR comments.
 
 ### Continuous Integration
-See https://hud.pytorch.org/hud/pytorch/executorch/main for the current state of
+
+See https://hud.pytorch.org/hud/pytorch/executorch/main for the current state of <!-- @lint-ignore -->
 the CI (continuous integration) jobs. If `main` is broken, consider rebasing
 your PR onto the `viable/strict` branch, which points to the most recent
 all-green commit.
 
@@ -31,6 +31,7 @@
 from .decompose_add_sub_alpha_pass import DecomposeAddSubAlphaPass  # noqa
 from .decompose_addmm_pass import DecomposeAddmmPass  # noqa
 from .decompose_any_pass import DecomposeAnyPass  # noqa
+from .decompose_as_strided_copy_pass import DecomposeAsStridedCopyPass  # noqa
 from .decompose_asin_and_acos_pass import DecomposeAsinAndAcosPass  # noqa
 from .decompose_asinh_pass import DecomposeAsinhPass  # noqa
 from .decompose_atan_pass import DecomposeAtanPass  # noqa
 
@@ -38,6 +38,7 @@
     DecomposeAnyPass,
     DecomposeAsinAndAcosPass,
     DecomposeAsinhPass,
+    DecomposeAsStridedCopyPass,
     DecomposeAtanhPass,
     DecomposeAtanPass,
     DecomposeAvgPool2dPass,
@@ -333,6 +334,7 @@ def _tosa_pipeline(
                 ConvertExpandCopyToRepeatPass(),
                 UnsqueezeBeforeRepeatPass(),
                 DecomposeCumsumPass(exported_program),
+                DecomposeAsStridedCopyPass(),
                 DecomposeMaxPool2dPass(),
                 SizeAdjustInputPass(),
                 DecomposeSelectPass(),
 
@@ -0,0 +1,113 @@
+# Copyright 2026 Arm Limited and/or its affiliates.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+from typing import Dict, Optional, Set, Tuple, Type
+
+import torch
+
+from executorch.backends.arm._passes import ArmPass
+from executorch.backends.arm.common.as_strided_utils import (
+    contiguous_strides,
+    maybe_static_sequence,
+    to_int,
+    to_int_tuple,
+)
+from executorch.exir.dialects._ops import ops as exir_ops
+from executorch.exir.pass_base import ExportPass
+
+
+class DecomposeAsStridedCopyPass(ArmPass):
+    """
+    Replace contiguous `aten.as_strided_copy` with `aten.view_copy`.
+
+    The TOSA backend only supports the contiguous-as-strided case where the stride matches
+    row-major layout and the storage offset is zero. In that scenario the operator is
+    equivalent to a reshape with copy semantics and can be lowered via `view_copy`.
+    """
+
+    _passes_required_after: Set[Type[ExportPass]] = set()
+
+    _EDGE_OPS = (exir_ops.edge.aten.as_strided_copy.default,)
+    _ATEN_OPS = (torch.ops.aten.as_strided_copy.default,)
+
+    def _extract_args(
+        self, args: Tuple[object, ...], kwargs: dict
+    ) -> Optional[Tuple[Tuple[int, ...], Tuple[int, ...], int]]:
+        """Return (size, stride, storage_offset) when they are statically known."""
+        if len(args) < 3:
+            return None
+
+        size_arg = args[1]
+        stride_arg = args[2]
+        offset_arg = (
+            kwargs.get("storage_offset") if "storage_offset" in kwargs else None
+        )
+        if offset_arg is None and len(args) > 3:
+            offset_arg = args[3]
+
+        size_seq = maybe_static_sequence(size_arg)
+        stride_seq = maybe_static_sequence(stride_arg)
+        if size_seq is None or stride_seq is None:
+            return None
+
+        size_tuple = to_int_tuple(size_seq)
+        stride_tuple = to_int_tuple(stride_seq)
+        if size_tuple is None or stride_tuple is None:
+            return None
+
+        if len(size_tuple) != len(stride_tuple):
+            return None
+
+        if any(stride < 0 for stride in stride_tuple):
+            return None
+
+        if offset_arg is None:
+            storage_offset = 0
+        else:
+            parsed_offset = to_int(offset_arg)
+            if parsed_offset is None:
+                return None
+            storage_offset = parsed_offset
+
+        return size_tuple, stride_tuple, storage_offset
+
+    def call_operator(self, op, args, kwargs, meta, updated: Optional[bool] = False):
+        if op not in (*self._EDGE_OPS, *self._ATEN_OPS):
+            return super().call_operator(op, args, kwargs, meta, updated)
+
+        extracted = self._extract_args(args, kwargs)
+        if extracted is None:
+            return super().call_operator(op, args, kwargs, meta, updated)
+
+        size_tuple, stride_tuple, storage_offset = extracted
+        if storage_offset != 0:
+            return super().call_operator(op, args, kwargs, meta, updated)
+
+        expected_strides = contiguous_strides(size_tuple)
+
+        def _stride_matches(idx: int, dim: int) -> bool:
+            stride = stride_tuple[idx]
+            expected = expected_strides[idx]
+            if idx == len(size_tuple) - 1:
+                return stride >= expected
+            if dim == 1 or expected == 0:
+                return True
+            return stride == expected
+
+        if any(not _stride_matches(i, dim) for i, dim in enumerate(size_tuple)):
+            return super().call_operator(op, args, kwargs, meta, updated)
+
+        view_args = (args[0], tuple(size_tuple))
+        view_kwargs: Dict[str, object] = {}
+
+        view_op = (
+            exir_ops.edge.aten.view_copy.default
+            if op in self._EDGE_OPS
+            else torch.ops.aten.view_copy.default
+        )
+
+        return super().call_operator(
+            view_op, view_args, view_kwargs, meta, updated=True
+        )
@@ -0,0 +1,70 @@
+# Copyright 2026 Arm Limited and/or its affiliates.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+"""Utility helpers shared across as_strided_copy handling."""
+
+from __future__ import annotations
+
+import numbers
+
+from collections.abc import Sequence
+from typing import Optional, Tuple, TypeVar
+
+import torch
+import torch.fx as fx
+
+T = TypeVar("T", bound=Sequence)
+
+
+def to_int(value: object) -> Optional[int]:
+    """Return an int for supported numeric types, otherwise None."""
+    if isinstance(value, (numbers.Integral, torch.SymInt)):
+        return int(value)
+    return None
+
+
+def maybe_static_sequence(value: object) -> Optional[Sequence]:
+    """
+    Return a Python sequence for literal or FX-constant values.
+
+    FX exporters often wrap constant lists in nodes where the materialised
+    value is stored in ``node.meta["val"]``. This helper unwraps that so the
+    rest of the logic can treat them uniformly.
+    """
+    if isinstance(value, (str, bytes)):
+        return None
+    if isinstance(value, fx.Node):
+        const_val = value.meta.get("val")
+        if isinstance(const_val, Sequence):
+            return const_val
+        return None
+    if isinstance(value, Sequence):
+        return value
+    return None
+
+
+def to_int_tuple(value: object) -> Optional[Tuple[int, ...]]:
+    """Best-effort conversion of a sequence of integers/SymInts to a tuple[int, ...]."""
+    seq = maybe_static_sequence(value)
+    if seq is None:
+        return None
+
+    result: list[int] = []
+    for item in seq:
+        converted = to_int(item)
+        if converted is None:
+            return None
+        result.append(converted)
+    return tuple(result)
+
+
+def contiguous_strides(shape: Sequence[int]) -> Tuple[int, ...]:
+    """Compute row-major contiguous strides for the provided shape."""
+    strides = [0] * len(shape)
+    running = 1
+    for idx in reversed(range(len(shape))):
+        dim_val = shape[idx]
+        strides[idx] = running if dim_val != 0 else 1
+        running *= max(dim_val, 1)
+    return tuple(strides)
@@ -5,6 +5,7 @@
 
 
 from . import (  # noqa
+    as_strided_copy_support,
     clone_dim_order_support,
     control_flow_support,
     convolution_support,
Original file line number	Diff line number	Diff line change
`@@ -104,7 +104,7 @@ class BenchmarkFilters:`
`104`	`104`
`105`	`105`	`BASE_URLS = {`
`106`	`106`	`"local": "http://localhost:3000",`
`107`		`- "prod": "https://hud.pytorch.org",`
	`107`	`+ "prod": "https://hud.pytorch.org", # @lint-ignore`
`108`	`108`	`}`
`109`	`109`
`110`	`110`