robell
diff --git a/‎backends/arm/ethosu/partitioner.py‎
Lines changed: 2 additions & 2 deletions b/‎backends/arm/ethosu/partitioner.py‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎backends/arm/scripts/docgen/ethos-u/backends-arm-ethos-u-overview.md.in‎
Lines changed: 5 additions & 5 deletions b/‎backends/arm/scripts/docgen/ethos-u/backends-arm-ethos-u-overview.md.in‎
Lines changed: 5 additions & 5 deletions
diff --git a/‎backends/arm/scripts/generate_grid_sampler_spirv.py‎
Lines changed: 75 additions & 0 deletions b/‎backends/arm/scripts/generate_grid_sampler_spirv.py‎
Lines changed: 75 additions & 0 deletions
diff --git a/‎backends/arm/test/misc/test_custom_shader_payload.py‎
Lines changed: 79 additions & 0 deletions b/‎backends/arm/test/misc/test_custom_shader_payload.py‎
Lines changed: 79 additions & 0 deletions
diff --git a/‎backends/arm/test/misc/test_extract_io_params_tosa.py‎
Lines changed: 25 additions & 0 deletions b/‎backends/arm/test/misc/test_extract_io_params_tosa.py‎
Lines changed: 25 additions & 0 deletions
diff --git a/‎backends/arm/test/ops/test_grid_sampler.py‎
Lines changed: 62 additions & 0 deletions b/‎backends/arm/test/ops/test_grid_sampler.py‎
Lines changed: 62 additions & 0 deletions
@@ -5,10 +5,10 @@
 
 from typing import final, Optional, Sequence
 
-import torch
 from executorch.backends.arm.ethosu import EthosUBackend, EthosUCompileSpec
 from executorch.backends.arm.tosa.partitioner import TOSAPartitioner
 from executorch.exir.backend.partitioner import DelegationSpec
+from torch._ops import OpOverload
 from torch.fx.passes.operator_support import OperatorSupportBase
 
 
@@ -33,5 +33,5 @@ def __init__(
         )
         self.additional_checks = additional_checks
         self.tosa_spec = compile_spec.tosa_spec
-        self._custom_partition_ops: set[torch._ops.OpOverload] = set()
+        self._custom_partition_ops: set[OpOverload] = set()
         self.intermediate_path = compile_spec._get_intermediate_path()
@@ -4,7 +4,7 @@ The Arm&reg; Ethos&trade;-U backend targets Edge/IoT-type AI use-cases by enabli
 [Arm&reg; Ethos&trade;-U55 NPU](https://www.arm.com/products/silicon-ip-cpu/ethos/ethos-u55), [Arm&reg; Ethos&trade;-U65 NPU](https://www.arm.com/products/silicon-ip-cpu/ethos/ethos-u65), and
 [Arm&reg; Ethos&trade;-U85 NPU](https://www.arm.com/products/silicon-ip-cpu/ethos/ethos-u85), leveraging [TOSA](https://www.mlplatform.org/tosa/) and the
 [ethos-u-vela](https://pypi.org/project/ethos-u-vela/) graph compiler. This document is a technical reference for using the Ethos-U backend, for a top level view with code examples
-please refer to the [Arm Ethos-U Backend Tutorial](tutorials/ethos-u-getting-started.md). <!-- @lint-ignore -->
+please refer to the [Arm Ethos-U Backend Tutorial](tutorials/ethos-u-getting-started.md).
 
 ## Features
 
@@ -27,7 +27,7 @@ For the AOT flow, compilation of a model to `.pte` format using the Ethos-U back
 - [TOSA Serialization Library](https://www.mlplatform.org/tosa/software.html) for serializing the Exir IR graph into TOSA IR.
 - [Ethos-U Vela graph compiler](https://pypi.org/project/ethos-u-vela/) for compiling TOSA flatbuffers into an Ethos-U command stream.
 
-And for building and running the example application available in `examples/arm/executor_runner/` through the standalone CMake entry point:
+And for building and running the example application available in `examples/arm/executor_runner/`:
 - [Arm GNU Toolchain](https://developer.arm.com/Tools%20and%20Software/GNU%20Toolchain) for cross compilation.
 - [Arm&reg; Corstone&trade; SSE-300 FVP](https://developer.arm.com/documentation/100966/1128/Arm--Corstone-SSE-300-FVP) for testing on a Arm&reg; Cortex&reg;-M55+Ethos-U55 reference design.
 - [Arm&reg; Corstone&trade; SSE-320 FVP](https://developer.arm.com/documentation/109760/0000/SSE-320-FVP) for testing on a Arm&reg; Cortex&reg;-M85+Ethos-U85 reference design.
@@ -55,7 +55,7 @@ For more information on quantization, see [Quantization](arm-ethos-u-quantizatio
 
 ## Runtime Integration
 
-An example runtime application is available in [examples/arm/executor_runner](https://github.com/pytorch/executorch/blob/main/examples/arm/executor_runner/), with a standalone CMake entry point in `examples/arm/executor_runner/standalone`. The steps required for building and deploying it on an FVP are explained in the previously mentioned [Arm Ethos-U Backend Tutorial](tutorials/ethos-u-getting-started.md). <!-- @lint-ignore -->
+An example runtime application is available in [examples/arm/executor_runner](https://github.com/pytorch/executorch/blob/main/examples/arm/executor_runner/), and the steps requried for building and deploying it on a FVP it is explained in the previously mentioned [Arm Ethos-U Backend Tutorial](tutorials/ethos-u-getting-started.md).
 The example application is recommended to use for testing basic functionality of your lowered models, as well as a starting point for developing runtime integrations for your own targets.
 For an in-depth explanation of the architecture of the executor_runner and the steps required for doing such an integration, please refer to [Ethos-U porting guide](https://github.com/pytorch/executorch/blob/main/examples/arm/ethos-u-porting-guide.md).
 
@@ -153,7 +153,7 @@ ExecuTorch for the Ethos-U backend, you automatically install the compiler conta
 
 **→{doc}`/backends/arm-ethos-u/arm-ethos-u-troubleshooting` — Troubleshooting and common issues.**
 
-**→{doc}`/backends/arm-ethos-u/tutorials/ethos-u-getting-started` — Getting started tutorial.**
+**→{doc}`/backends/arm-ethos-u/tutorials/arm-ethos-u-tutorials` — Tutorials.**
 
 **→{doc}`/backends/arm-ethos-u/U55_op_support` — Ethos-U55 supported operators.**
 
@@ -168,7 +168,7 @@ ExecuTorch for the Ethos-U backend, you automatically install the compiler conta
 arm-ethos-u-partitioner
 arm-ethos-u-quantization
 arm-ethos-u-troubleshooting
-tutorials/ethos-u-getting-started
+tutorials/arm-ethos-u-tutorials
 U55_op_support
 U85_op_support
 ```
@@ -0,0 +1,75 @@
+# Copyright 2026 Arm Limited and/or its affiliates.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+import argparse
+import base64
+import shutil
+import subprocess  # nosec B404 - required to invoke the shader compiler.
+import tempfile
+from pathlib import Path
+
+
+SHADER_DIR = Path(__file__).resolve().parents[1] / "vgf" / "shaders"
+DEFAULT_SOURCE = SHADER_DIR / "grid_sampler.glsl"
+DEFAULT_OUTPUT = SHADER_DIR / "grid_sampler.spirv.b64"
+
+
+def _parse_args() -> argparse.Namespace:
+    parser = argparse.ArgumentParser(
+        description=(
+            "Compile the VGF grid_sampler GLSL shader to SPIR-V and write the "
+            "base64-encoded payload consumed by the ExecuTorch custom-shader "
+            "lowering."
+        )
+    )
+    parser.add_argument(
+        "--source",
+        type=Path,
+        default=DEFAULT_SOURCE,
+        help=f"GLSL source file. Defaults to {DEFAULT_SOURCE}",
+    )
+    parser.add_argument(
+        "--output",
+        type=Path,
+        default=DEFAULT_OUTPUT,
+        help=f"Base64 SPIR-V output file. Defaults to {DEFAULT_OUTPUT}",
+    )
+    parser.add_argument(
+        "--glslc",
+        default="glslc",
+        help="Path to glslc. Defaults to resolving glslc from PATH.",
+    )
+    return parser.parse_args()
+
+
+def _resolve_glslc(glslc: str) -> str:
+    resolved = shutil.which(glslc)
+    if resolved is None:
+        raise RuntimeError(
+            f"Could not find {glslc}. Install the Vulkan SDK or pass --glslc."
+        )
+    return resolved
+
+
+def _write_base64_spirv(spirv_path: Path, output_path: Path) -> None:
+    encoded = base64.b64encode(spirv_path.read_bytes()).decode("ascii")
+    output_path.write_text(encoded + "\n", encoding="utf-8")
+
+
+def main() -> None:
+    args = _parse_args()
+    glslc = _resolve_glslc(args.glslc)
+
+    with tempfile.TemporaryDirectory() as tmpdir:
+        spirv_path = Path(tmpdir) / "grid_sampler.spirv"
+        subprocess.run(  # nosec B603 - glslc path is resolved explicitly.
+            [glslc, str(args.source), "-o", str(spirv_path)],
+            check=True,
+        )
+        _write_base64_spirv(spirv_path, args.output)
+
+
+if __name__ == "__main__":
+    main()
@@ -0,0 +1,79 @@
+# Copyright 2026 Arm Limited and/or its affiliates.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+import base64
+
+import pytest
+from executorch.backends.arm.vgf.shaders.grid_sampler import (
+    build_grid_sampler_2d_payload,
+    decode_payload,
+    encode_payload,
+    GRID_SAMPLER_2D_SHADER_BINARY,
+    GRID_SAMPLER_2D_SHADER_ENTRY_POINT,
+    GRID_SAMPLER_2D_SHADER_LANGUAGE,
+    GRID_SAMPLER_2D_SHADER_SOURCE,
+    GRID_SAMPLER_2D_VK_FORMAT,
+    GRID_SAMPLER_2D_WORKGROUP_SIZES,
+)
+
+
+def test_grid_sampler_2d_custom_shader_payload_no_target_round_trip():
+    payload = build_grid_sampler_2d_payload(
+        interpolation_mode=0,
+        padding_mode=2,
+        align_corners=True,
+    )
+    decoded = decode_payload(encode_payload(payload))
+
+    assert decoded["entry_point"] == GRID_SAMPLER_2D_SHADER_ENTRY_POINT
+    assert decoded["workgroup_sizes"] == GRID_SAMPLER_2D_WORKGROUP_SIZES
+    assert decoded["shader_language"] == GRID_SAMPLER_2D_SHADER_LANGUAGE
+    assert base64.b64decode(decoded["shader_code"])[:4] == b"\x03\x02\x23\x07"
+    assert decoded["input_0_type"] == "Tensor"
+    assert decoded["input_0_vkformat"] == GRID_SAMPLER_2D_VK_FORMAT
+    assert decoded["input_0_vkdescriptortype"] == "VK_DESCRIPTOR_TYPE_STORAGE_BUFFER"
+    assert decoded["input_0_binding"] == 0
+    assert decoded["input_1_type"] == "Tensor"
+    assert decoded["input_1_vkformat"] == GRID_SAMPLER_2D_VK_FORMAT
+    assert decoded["input_1_vkdescriptortype"] == "VK_DESCRIPTOR_TYPE_STORAGE_BUFFER"
+    assert decoded["input_1_binding"] == 1
+    assert decoded["output_0_type"] == "Tensor"
+    assert decoded["output_0_vkformat"] == GRID_SAMPLER_2D_VK_FORMAT
+    assert decoded["output_0_vkdescriptortype"] == "VK_DESCRIPTOR_TYPE_STORAGE_BUFFER"
+    assert decoded["output_0_binding"] == 2
+
+
+def test_grid_sampler_2d_custom_shader_payload_no_target_uses_spirv():
+    payload = build_grid_sampler_2d_payload(
+        interpolation_mode=0,
+        padding_mode=0,
+        align_corners=False,
+    )
+
+    shader_binary = base64.b64decode(payload["shader_code"])
+
+    assert payload["shader_language"] == "SPIR-V"
+    assert shader_binary[:4] == b"\x03\x02\x23\x07"
+
+
+def test_grid_sampler_2d_custom_shader_payload_no_target_has_shader_resources():
+    assert GRID_SAMPLER_2D_SHADER_SOURCE == "grid_sampler.glsl"
+    assert GRID_SAMPLER_2D_SHADER_BINARY == "grid_sampler.spirv.b64"
+
+
+def test_grid_sampler_2d_custom_shader_payload_no_target_rejects_bad_modes():
+    with pytest.raises(ValueError, match="Unsupported interpolation_mode"):
+        build_grid_sampler_2d_payload(
+            interpolation_mode=99,
+            padding_mode=0,
+            align_corners=False,
+        )
+
+    with pytest.raises(ValueError, match="Unsupported padding_mode"):
+        build_grid_sampler_2d_payload(
+            interpolation_mode=0,
+            padding_mode=99,
+            align_corners=False,
+        )
@@ -7,6 +7,7 @@
 
 import pytest
 import torch
+from executorch.backends.arm.ethosu import EthosUCompileSpec, EthosUPartitioner
 from executorch.backends.arm.quantizer import VgfQuantizer
 from executorch.backends.arm.quantizer.arm_quantizer import (
     get_symmetric_quantization_config,
@@ -18,6 +19,7 @@
 from executorch.backends.arm.tosa.partitioner import TOSAPartitioner
 from executorch.backends.arm.vgf import VgfCompileSpec, VgfPartitioner
 from executorch.exir import to_edge_transform_and_lower
+from executorch.exir.dialects._ops import ops as exir_ops
 from executorch.exir.passes.quantize_io_pass import extract_io_quant_params
 from torchao.quantization.pt2e.quantize_pt2e import convert_pt2e, prepare_pt2e
 
@@ -88,3 +90,26 @@ def test_roundtrip_extracts_io_params_tosa_INT(
     assert isinstance(out_name, str)
     assert isinstance(out_params["scale"], float)
     assert isinstance(out_params["zero_point"], int)
+
+
+def test_only_vgf_partitioner_registers_grid_sampler_no_target_custom_partition_op():
+    tosa_partitioner = TOSAPartitioner(TosaCompileSpec("TOSA-1.0+FP"))
+    vgf_partitioner = VgfPartitioner(VgfCompileSpec("TOSA-1.0+FP"))
+    ethosu_partitioner = EthosUPartitioner(EthosUCompileSpec("ethos-u55-128"))
+
+    assert hasattr(tosa_partitioner, "_custom_partition_ops")
+    assert hasattr(vgf_partitioner, "_custom_partition_ops")
+    assert hasattr(ethosu_partitioner, "_custom_partition_ops")
+
+    assert (
+        exir_ops.edge.aten.grid_sampler_2d.default
+        not in tosa_partitioner._custom_partition_ops
+    )
+    assert (
+        exir_ops.edge.aten.grid_sampler_2d.default
+        in vgf_partitioner._custom_partition_ops
+    )
+    assert (
+        exir_ops.edge.aten.grid_sampler_2d.default
+        not in ethosu_partitioner._custom_partition_ops
+    )
@@ -0,0 +1,62 @@
+# Copyright 2026 Arm Limited and/or its affiliates.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+from typing import Tuple
+
+import torch
+import torch.nn.functional as F
+from executorch.backends.arm.test import common
+from executorch.backends.arm.test.tester.test_pipeline import VgfPipeline
+
+input_t = Tuple[torch.Tensor, torch.Tensor]
+aten_op = "torch.ops.aten.grid_sampler.default"
+exir_op = "executorch_exir_dialects_edge__ops_aten_grid_sampler_2d_default"
+
+test_data_suite = {
+    "2d_bilinear_zeros": lambda: (
+        torch.randn(1, 3, 8, 8),
+        torch.randn(1, 4, 4, 2),
+    ),
+}
+
+xfails = {
+    "2d_bilinear_zeros": (
+        "CI model_converter does not yet include Vulkan custom-shader "
+        "tosa.custom legalization",
+        RuntimeError,
+    ),
+}
+
+
+class GridSampler2d(torch.nn.Module):
+    def __init__(self):
+        super().__init__()
+        self.interpolation_mode_ = 0
+        self.padding_mode_ = 0
+        self.align_corners_ = False
+
+    def forward(self, x, grid):
+        return F.grid_sample(
+            x,
+            grid,
+            mode="bilinear" if self.interpolation_mode_ == 0 else "nearest",
+            padding_mode="zeros" if self.padding_mode_ == 0 else "border",
+            align_corners=self.align_corners_,
+        )
+
+
+@common.parametrize("test_data", test_data_suite, xfails=xfails, strict=False)
+@common.SkipIfNoModelConverter
+def test_grid_sampler_vgf_no_quant(test_data):
+    test_data = test_data()
+    pipeline = VgfPipeline[input_t](
+        GridSampler2d(),
+        test_data,
+        aten_op,
+        exir_op,
+        quantize=False,
+        run_on_vulkan_runtime=False,
+    )
+    pipeline.run()