Skip to content

Commit 8fc7355

Browse files
committed
solve stride out of scope
1 parent 1270870 commit 8fc7355

2 files changed

Lines changed: 44 additions & 20 deletions

File tree

backends/cuda/runtime/cuda_backend.cpp

Lines changed: 4 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -691,13 +691,8 @@ class ET_EXPERIMENTAL CudaBackend final
691691
handle->cuda_graph_state.static_input_ptrs.push_back(static_ptr);
692692
handle->cuda_graph_state.static_input_nbytes.push_back(nbytes);
693693

694-
gpu_inputs[i] = new SlimTensor(slim::from_blob(
695-
static_ptr,
696-
slim::makeArrayRef(sizes_vec),
697-
slim::makeArrayRef(strides_vec),
698-
static_cast<slim::c10::ScalarType>(cpu_tensor->scalar_type()),
699-
DEFAULT_CUDA_DEVICE,
700-
0));
694+
gpu_inputs[i] = make_slimtensor_from_blob_with_etensor_metadata(
695+
static_ptr, cpu_tensor);
701696
continue;
702697
}
703698

@@ -709,19 +704,8 @@ class ET_EXPERIMENTAL CudaBackend final
709704
cudaError_t err = cudaPointerGetAttributes(&attributes, data_ptr);
710705
if (err == cudaSuccess && attributes.type == cudaMemoryTypeDevice) {
711706
// Data is already on GPU - wrap it directly without copy
712-
auto sizes = cpu_tensor->sizes();
713-
auto strides = cpu_tensor->strides();
714-
std::vector<int64_t> sizes_vec(sizes.begin(), sizes.end());
715-
std::vector<int64_t> strides_vec(strides.begin(), strides.end());
716-
717-
gpu_inputs[i] = new SlimTensor(slim::from_blob(
718-
const_cast<void*>(data_ptr),
719-
slim::makeArrayRef(sizes_vec),
720-
slim::makeArrayRef(strides_vec),
721-
static_cast<slim::c10::ScalarType>(cpu_tensor->scalar_type()),
722-
DEFAULT_CUDA_DEVICE,
723-
0 // storage_offset
724-
));
707+
gpu_inputs[i] = make_slimtensor_from_blob_with_etensor_metadata(
708+
const_cast<void*>(data_ptr), cpu_tensor);
725709

726710
continue;
727711
}

backends/cuda/runtime/utils.h

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,8 @@
1717
#include <executorch/backends/aoti/slim/c10/core/Device.h>
1818
#include <executorch/backends/aoti/slim/core/slim_tensor.h>
1919
#include <executorch/backends/aoti/slim/core/storage.h>
20+
#include <executorch/backends/aoti/slim/factory/from_blob.h>
21+
#include <executorch/backends/aoti/slim/util/array_ref_util.h>
2022

2123
namespace executorch::backends::cuda {
2224

@@ -314,4 +316,42 @@ inline void delete_slimtensor_vector(
314316
tensors.clear();
315317
}
316318

319+
/**
320+
* Creates a non-owning SlimTensor that wraps an external data pointer, using
321+
* the shape, strides and dtype of the given ETensor as metadata.
322+
*
323+
* Common helper for the CUDA backend, where we frequently need to create a
324+
* SlimTensor view of memory (e.g., a GPU buffer) that mirrors the layout of a
325+
* CPU/GPU ETensor. The returned tensor does NOT own the data; the caller must
326+
* keep it alive for the SlimTensor's lifetime.
327+
*
328+
* @param data_ptr Pointer to memory the SlimTensor should reference.
329+
* @param etensor ETensor whose sizes/strides/dtype define the SlimTensor's
330+
* metadata.
331+
* @param device Device where data_ptr resides (defaults to the default
332+
* CUDA device).
333+
* @return A heap-allocated SlimTensor; the caller takes ownership.
334+
*/
335+
inline executorch::backends::aoti::slim::SlimTensor*
336+
make_slimtensor_from_blob_with_etensor_metadata(
337+
void* data_ptr,
338+
const executorch::runtime::etensor::Tensor* etensor,
339+
const executorch::backends::aoti::slim::c10::Device& device =
340+
executorch::backends::aoti::slim::DEFAULT_CUDA_DEVICE) {
341+
auto sizes = etensor->sizes();
342+
auto strides = etensor->strides();
343+
std::vector<int64_t> sizes_vec(sizes.begin(), sizes.end());
344+
std::vector<int64_t> strides_vec(strides.begin(), strides.end());
345+
346+
return new executorch::backends::aoti::slim::SlimTensor(
347+
executorch::backends::aoti::slim::from_blob(
348+
data_ptr,
349+
executorch::backends::aoti::slim::makeArrayRef(sizes_vec),
350+
executorch::backends::aoti::slim::makeArrayRef(strides_vec),
351+
static_cast<executorch::backends::aoti::slim::c10::ScalarType>(
352+
etensor->scalar_type()),
353+
device,
354+
/*storage_offset=*/0));
355+
}
356+
317357
} // namespace executorch::backends::cuda

0 commit comments

Comments
 (0)