solve stride out of scope

Gasoonjia · Gasoonjia · commit 8fc7355ee126 · 2026-04-16T23:25:59.000-07:00
diff --git a/backends/cuda/runtime/cuda_backend.cpp b/backends/cuda/runtime/cuda_backend.cpp
@@ -691,13 +691,8 @@ class ET_EXPERIMENTAL CudaBackend final
         handle->cuda_graph_state.static_input_ptrs.push_back(static_ptr);
         handle->cuda_graph_state.static_input_nbytes.push_back(nbytes);
 
-        gpu_inputs[i] = new SlimTensor(slim::from_blob(
-            static_ptr,
-            slim::makeArrayRef(sizes_vec),
-            slim::makeArrayRef(strides_vec),
-            static_cast<slim::c10::ScalarType>(cpu_tensor->scalar_type()),
-            DEFAULT_CUDA_DEVICE,
-            0));
+        gpu_inputs[i] = make_slimtensor_from_blob_with_etensor_metadata(
+            static_ptr, cpu_tensor);
         continue;
       }
 
@@ -709,19 +704,8 @@ class ET_EXPERIMENTAL CudaBackend final
         cudaError_t err = cudaPointerGetAttributes(&attributes, data_ptr);
         if (err == cudaSuccess && attributes.type == cudaMemoryTypeDevice) {
           // Data is already on GPU - wrap it directly without copy
-          auto sizes = cpu_tensor->sizes();
-          auto strides = cpu_tensor->strides();
-          std::vector<int64_t> sizes_vec(sizes.begin(), sizes.end());
-          std::vector<int64_t> strides_vec(strides.begin(), strides.end());
-
-          gpu_inputs[i] = new SlimTensor(slim::from_blob(
-              const_cast<void*>(data_ptr),
-              slim::makeArrayRef(sizes_vec),
-              slim::makeArrayRef(strides_vec),
-              static_cast<slim::c10::ScalarType>(cpu_tensor->scalar_type()),
-              DEFAULT_CUDA_DEVICE,
-              0 // storage_offset
-              ));
+          gpu_inputs[i] = make_slimtensor_from_blob_with_etensor_metadata(
+              const_cast<void*>(data_ptr), cpu_tensor);
 
           continue;
         }
diff --git a/backends/cuda/runtime/utils.h b/backends/cuda/runtime/utils.h
@@ -17,6 +17,8 @@
 #include <executorch/backends/aoti/slim/c10/core/Device.h>
 #include <executorch/backends/aoti/slim/core/slim_tensor.h>
 #include <executorch/backends/aoti/slim/core/storage.h>
+#include <executorch/backends/aoti/slim/factory/from_blob.h>
+#include <executorch/backends/aoti/slim/util/array_ref_util.h>
 
 namespace executorch::backends::cuda {
 
@@ -314,4 +316,42 @@ inline void delete_slimtensor_vector(
   tensors.clear();
 }
 
+/**
+ * Creates a non-owning SlimTensor that wraps an external data pointer, using
+ * the shape, strides and dtype of the given ETensor as metadata.
+ *
+ * Common helper for the CUDA backend, where we frequently need to create a
+ * SlimTensor view of memory (e.g., a GPU buffer) that mirrors the layout of a
+ * CPU/GPU ETensor. The returned tensor does NOT own the data; the caller must
+ * keep it alive for the SlimTensor's lifetime.
+ *
+ * @param data_ptr Pointer to memory the SlimTensor should reference.
+ * @param etensor  ETensor whose sizes/strides/dtype define the SlimTensor's
+ *                 metadata.
+ * @param device   Device where data_ptr resides (defaults to the default
+ *                 CUDA device).
+ * @return A heap-allocated SlimTensor; the caller takes ownership.
+ */
+inline executorch::backends::aoti::slim::SlimTensor*
+make_slimtensor_from_blob_with_etensor_metadata(
+    void* data_ptr,
+    const executorch::runtime::etensor::Tensor* etensor,
+    const executorch::backends::aoti::slim::c10::Device& device =
+        executorch::backends::aoti::slim::DEFAULT_CUDA_DEVICE) {
+  auto sizes = etensor->sizes();
+  auto strides = etensor->strides();
+  std::vector<int64_t> sizes_vec(sizes.begin(), sizes.end());
+  std::vector<int64_t> strides_vec(strides.begin(), strides.end());
+
+  return new executorch::backends::aoti::slim::SlimTensor(
+      executorch::backends::aoti::slim::from_blob(
+          data_ptr,
+          executorch::backends::aoti::slim::makeArrayRef(sizes_vec),
+          executorch::backends::aoti::slim::makeArrayRef(strides_vec),
+          static_cast<executorch::backends::aoti::slim::c10::ScalarType>(
+              etensor->scalar_type()),
+          device,
+          /*storage_offset=*/0));
+}
+
 } // namespace executorch::backends::cuda