[slimtensor] Add as_strided() view operation for tensor reinterpretation (#16838)

pytorchbot · Gasoonjia · web-flow · commit 46b8df1d1810 · 2026-01-23T13:08:37.000-08:00
This PR was created by the merge bot to help merge the original PR into the main branch. ghstack PR number: #16443 by @Gasoonjia ^ Please use this as the source of truth for the PR details, comments, and reviews ghstack PR base: https://github.com/pytorch/executorch/tree/gh/gasoonjia/85/base ghstack PR head: https://github.com/pytorch/executorch/tree/gh/gasoonjia/85/head Merge bot PR base: https://github.com/pytorch/executorch/tree/gh/gasoonjia/84/orig Merge bot PR head: https://github.com/pytorch/executorch/tree/gh/gasoonjia/85/orig Differential Revision: [D89951036](https://our.internmc.facebook.com/intern/diff/D89951036/) @diff-train-skip-merge --------- Co-authored-by: gasoonjia <gasoonjia@icloud.com>
diff --git a/backends/aoti/slim/c10/core/WrapDimMinimal.h b/backends/aoti/slim/c10/core/WrapDimMinimal.h
@@ -0,0 +1,94 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#pragma once
+
+#include <cstdint>
+#include <utility>
+
+#include <executorch/backends/aoti/slim/c10/macros/Macros.h>
+#include <executorch/runtime/platform/assert.h>
+
+namespace executorch::backends::aoti::slim::c10 {
+
+namespace detail {
+
+/// Slow path for maybe_wrap_dim when dimension needs validation.
+template <typename T>
+inline T maybe_wrap_dim_slow(T dim, T dim_post_expr, bool wrap_scalar) {
+  ET_CHECK_MSG(
+      dim_post_expr >= 0,
+      "Rank cannot be negative but got %ld",
+      static_cast<long>(dim_post_expr));
+
+  if (dim_post_expr == 0) {
+    ET_CHECK_MSG(
+        wrap_scalar,
+        "Dimension specified as %ld but tensor has no dimensions",
+        static_cast<long>(dim));
+    // Recursively call with dim_post_expr=1
+    if (dim >= 0 && dim < 1) {
+      return dim;
+    } else if (dim >= -1 && dim < 0) {
+      return dim + 1;
+    }
+    ET_CHECK_MSG(
+        false,
+        "Dimension out of range (expected to be in range of [-1, 0], but got %ld)",
+        static_cast<long>(dim));
+  }
+
+  T min = dim_post_expr * -1;
+  T max = dim_post_expr - 1;
+  ET_CHECK_MSG(
+      min <= dim && dim <= max,
+      "Dimension out of range (expected to be in range of [%ld, %ld], but got %ld)",
+      static_cast<long>(min),
+      static_cast<long>(max),
+      static_cast<long>(dim));
+
+  // This should be unreachable if above check passes
+  return dim < 0 ? dim + dim_post_expr : dim;
+}
+
+} // namespace detail
+
+/// Wraps a dimension index to handle negative indexing.
+/// For example, dim=-1 with dim_post_expr=3 returns 2.
+///
+/// @param dim The dimension index (may be negative).
+/// @param dim_post_expr The number of dimensions.
+/// @param wrap_scalar If true, allows wrapping for 0-dimensional tensors.
+/// @return The wrapped dimension index (always non-negative).
+template <typename T>
+inline T _maybe_wrap_dim(T dim, T dim_post_expr, bool wrap_scalar = true) {
+  // Inline the fast paths
+  if (SLIMTENSOR_LIKELY(dim_post_expr * -1 <= dim && dim < dim_post_expr)) {
+    if (dim < 0) {
+      return dim + dim_post_expr;
+    }
+    return dim;
+  }
+  // Check edge-cases out-of-line
+  return detail::maybe_wrap_dim_slow<T>(
+      std::move(dim), std::move(dim_post_expr), wrap_scalar);
+}
+
+/// Wraps a dimension index for int64_t.
+inline int64_t
+maybe_wrap_dim(int64_t dim, int64_t dim_post_expr, bool wrap_scalar = true) {
+  return _maybe_wrap_dim(dim, dim_post_expr, wrap_scalar);
+}
+
+/// Wraps a dimension index for size_t.
+inline int64_t
+maybe_wrap_dim(int64_t dim, size_t dim_post_expr, bool wrap_scalar = true) {
+  return _maybe_wrap_dim(dim, static_cast<int64_t>(dim_post_expr), wrap_scalar);
+}
+
+} // namespace executorch::backends::aoti::slim::c10
diff --git a/backends/aoti/slim/c10/core/targets.bzl b/backends/aoti/slim/c10/core/targets.bzl
@@ -67,6 +67,19 @@ def define_common_targets():
         ],
     )
 
+    # Header-only library for WrapDimMinimal
+    runtime.cxx_library(
+        name = "wrap_dim_minimal",
+        headers = [
+            "WrapDimMinimal.h",
+        ],
+        visibility = ["@EXECUTORCH_CLIENTS"],
+        exported_deps = [
+            "//executorch/backends/aoti/slim/c10/macros:macros",
+            "//executorch/runtime/platform:platform",
+        ],
+    )
+
     # Combined c10 core library
     runtime.cxx_library(
         name = "core",
@@ -77,5 +90,6 @@ def define_common_targets():
             ":device_type",
             ":scalar_type",
             ":sizes_and_strides",
+            ":wrap_dim_minimal",
         ],
     )
diff --git a/backends/aoti/slim/core/SlimTensor.h b/backends/aoti/slim/core/SlimTensor.h
@@ -10,9 +10,12 @@
 
 #include <cstdint>
 #include <cstring>
+#include <optional>
 #include <utility>
 #include <vector>
 
+#include <c10/util/safe_numerics.h>
+
 #include <executorch/backends/aoti/slim/c10/core/Contiguity.h>
 #include <executorch/backends/aoti/slim/c10/core/Device.h>
 #include <executorch/backends/aoti/slim/c10/core/ScalarType.h>
@@ -254,22 +257,113 @@ class SlimTensor {
   }
 
   /**
-   * Set sizes and strides together.
+   * Set sizes, strides, and storage offset together.
    */
-  void set_sizes_and_strides(IntArrayRef sizes, IntArrayRef strides) {
+  void set_sizes_and_strides(
+      IntArrayRef sizes,
+      IntArrayRef strides,
+      std::optional<int64_t> storage_offset = std::nullopt) {
+    const size_t new_dim = sizes.size();
     ET_CHECK_MSG(
-        sizes.size() == strides.size(),
-        "sizes (%zu) and strides (%zu) must have the same length",
-        sizes.size(),
+        new_dim == strides.size(),
+        "dimensionality of sizes (%zu) must match dimensionality of strides (%zu)",
+        new_dim,
         strides.size());
 
-    sizes_and_strides_.set_sizes(sizes);
-    sizes_and_strides_.set_strides(strides);
+    std::vector<int64_t> new_sizes = toVec(sizes);
+    std::vector<int64_t> new_strides = toVec(strides);
+
+    // stride calculation logic
+    bool overflowed = false;
+    if (new_dim > 0) {
+      for (int64_t dim = new_dim - 1; dim >= 0; dim--) {
+        if (strides[dim] >= 0) {
+          new_strides[dim] = strides[dim];
+        } else {
+          // for negative strides
+          if (dim == new_dim - 1) {
+            new_strides[dim] = 1;
+          } else {
+            overflowed |= ::c10::mul_overflows(
+                new_strides[dim + 1],
+                std::max<int64_t>(new_sizes[dim + 1], 1),
+                &new_strides[dim]);
+          }
+        }
+      }
+    }
+    ET_CHECK_MSG(!overflowed, "Stride calculation overflowed");
+
+    sizes_and_strides_.set_sizes(makeArrayRef(new_sizes));
+    sizes_and_strides_.set_strides(makeArrayRef(new_strides));
+    if (storage_offset.has_value()) {
+      storage_offset_ = *storage_offset;
+    }
 
     refresh_numel();
     refresh_contiguous();
   }
 
+  /**
+   * Set sizes to a contiguous layout (computes strides automatically).
+   */
+  void set_sizes_contiguous(IntArrayRef sizes) {
+    std::vector<int64_t> contig_strides = compute_contiguous_strides(sizes);
+    set_sizes_and_strides(sizes, makeArrayRef(contig_strides));
+  }
+
+  // =========================================================================
+  // View Operations
+  // =========================================================================
+
+  /**
+   * Returns a view of the tensor with the specified sizes, strides, and
+   * storage offset. The returned tensor shares the same underlying storage.
+   *
+   * @param sizes The sizes of the view.
+   * @param strides The strides of the view.
+   * @param storage_offset Offset into storage in number of elements.
+   * @return A new SlimTensor that is a view of this tensor.
+   */
+  inline SlimTensor as_strided(
+      IntArrayRef sizes,
+      IntArrayRef strides,
+      int64_t storage_offset) const;
+
+  /**
+   * Overload for initializer lists.
+   */
+  inline SlimTensor as_strided(
+      std::initializer_list<int64_t> sizes,
+      std::initializer_list<int64_t> strides,
+      int64_t storage_offset) const {
+    return as_strided(
+        makeArrayRef(sizes), makeArrayRef(strides), storage_offset);
+  }
+
+  /**
+   * Modifies this tensor in-place to have the specified sizes, strides, and
+   * storage offset. The underlying storage remains unchanged.
+   *
+   * @param sizes The new sizes.
+   * @param strides The new strides.
+   * @param storage_offset New offset into storage in number of elements.
+   * @return Reference to this tensor.
+   */
+  inline SlimTensor&
+  as_strided_(IntArrayRef sizes, IntArrayRef strides, int64_t storage_offset);
+
+  /**
+   * Overload for initializer lists.
+   */
+  inline SlimTensor& as_strided_(
+      std::initializer_list<int64_t> sizes,
+      std::initializer_list<int64_t> strides,
+      int64_t storage_offset) {
+    return as_strided_(
+        makeArrayRef(sizes), makeArrayRef(strides), storage_offset);
+  }
+
   // =========================================================================
   // Copy Operation
   // =========================================================================
@@ -278,7 +372,7 @@ class SlimTensor {
    * Copy data from another tensor to this tensor.
    *
    * Both tensors must have the same numel and dtype.
-   * Supports CPU-to-CPU and cross-device copies (CPU↔CUDA, CUDA↔CUDA).
+   * Currently only supports CPU-to-CPU copy (contiguous tensors only).
    *
    * @param other The source tensor to copy from
    * @return Reference to this tensor
@@ -371,3 +465,7 @@ class SlimTensor {
 };
 
 } // namespace executorch::backends::aoti::slim
+
+// Include view operations implementations (must be after SlimTensor class
+// definition)
+#include <executorch/backends/aoti/slim/core/SlimTensorView-incl.h>
diff --git a/backends/aoti/slim/core/SlimTensorView-incl.h b/backends/aoti/slim/core/SlimTensorView-incl.h
@@ -0,0 +1,52 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#pragma once
+
+#include <executorch/backends/aoti/slim/c10/core/WrapDimMinimal.h>
+#include <executorch/backends/aoti/slim/util/ArrayRefUtil.h>
+
+namespace executorch::backends::aoti::slim {
+
+inline SlimTensor SlimTensor::as_strided(
+    IntArrayRef sizes,
+    IntArrayRef strides,
+    int64_t storage_offset) const {
+  SlimTensor result = *this;
+  result.as_strided_(sizes, strides, storage_offset);
+  return result;
+}
+
+inline SlimTensor& SlimTensor::as_strided_(
+    IntArrayRef sizes,
+    IntArrayRef strides,
+    int64_t storage_offset) {
+  ET_CHECK_MSG(
+      sizes.size() == strides.size(),
+      "as_strided: number of sizes (%zu) must equal number of strides (%zu)",
+      sizes.size(),
+      strides.size());
+
+  for (size_t i = 0; i < sizes.size(); ++i) {
+    ET_CHECK_MSG(
+        sizes[i] >= 0,
+        "as_strided: size at dimension %zu is negative: %ld",
+        i,
+        static_cast<long>(sizes[i]));
+  }
+
+  ET_CHECK_MSG(
+      storage_offset >= 0,
+      "as_strided: storage_offset must be non-negative, got: %ld",
+      static_cast<long>(storage_offset));
+
+  this->set_sizes_and_strides(sizes, strides, storage_offset);
+  return *this;
+}
+
+} // namespace executorch::backends::aoti::slim
diff --git a/backends/aoti/slim/core/targets.bzl b/backends/aoti/slim/core/targets.bzl
@@ -26,6 +26,7 @@ def define_common_targets():
         name = "slimtensor",
         headers = [
             "SlimTensor.h",
+            "SlimTensorView-incl.h",
         ],
         visibility = ["@EXECUTORCH_CLIENTS"],
         exported_deps = [
@@ -34,9 +35,10 @@ def define_common_targets():
             "//executorch/backends/aoti/slim/c10/core:device",
             "//executorch/backends/aoti/slim/c10/core:scalar_type",
             "//executorch/backends/aoti/slim/c10/core:sizes_and_strides",
+            "//executorch/backends/aoti/slim/c10/core:wrap_dim_minimal",
             "//executorch/backends/aoti/slim/util:array_ref_util",
             "//executorch/backends/aoti/slim/util:size_util",
-            "//executorch/backends/aoti/slim/c10/cuda:exception",
             "//executorch/runtime/platform:platform",
+            "//executorch/backends/aoti/slim/c10/cuda:exception",
         ],
     )
diff --git a/backends/aoti/slim/core/test/targets.bzl b/backends/aoti/slim/core/test/targets.bzl
@@ -7,8 +7,17 @@ def get_backend_mode():
 
 def define_common_targets():
     """Define test targets for SlimTensor core module."""
+    runtime.cxx_test(
+        name = "test_slimtensor_dtypes",
+        srcs = [
+            "test_slimtensor_dtypes.cpp",
+        ],
+        deps = [
+            "//executorch/backends/aoti/slim/factory:empty",
+        ],
+    )
 
-    # GPU storage test with CUDA support
+    # Backend mode specific tests
     for backend_mode in get_backend_mode():
         backend_suffix = "_" + backend_mode if backend_mode == "cuda" else ""
 
@@ -57,12 +66,14 @@ def define_common_targets():
             **backend_kwargs
         )
 
-    runtime.cxx_test(
-        name = "test_slimtensor_dtypes",
-        srcs = [
-            "test_slimtensor_dtypes.cpp",
-        ],
-        deps = [
-            "//executorch/backends/aoti/slim/factory:empty",
-        ],
-    )
+        runtime.cxx_test(
+            name = "test_as_strided" + backend_suffix,
+            srcs = [
+                "test_as_strided.cpp",
+            ],
+            deps = [
+                "//executorch/backends/aoti/slim/core:slimtensor",
+                "//executorch/backends/aoti/slim/factory:empty",
+            ],
+            **backend_kwargs
+        )
diff --git a/backends/aoti/slim/core/test/test_as_strided.cpp b/backends/aoti/slim/core/test/test_as_strided.cpp