1212
1313#include < c10/util/safe_numerics.h>
1414
15+ #ifndef USE_ATEN_LIB
16+ #include < executorch/runtime/core/device_allocator.h>
17+ #endif // USE_ATEN_LIB
1518#include < executorch/runtime/core/exec_aten/util/tensor_util.h>
1619
1720namespace executorch {
@@ -25,6 +28,9 @@ namespace {
2528 * ensures that they are managed together and have the same lifetime as the
2629 * Tensor. When the Tensor is destroyed, the Storage structure ensures
2730 * proper cleanup of the associated metadata and data if needed.
31+ *
32+ * For device tensors, the data pointer points to device memory; the deleter
33+ * is responsible for freeing it through the appropriate DeviceAllocator.
2834 */
2935struct Storage final {
3036 executorch::aten::TensorImpl tensor_impl;
@@ -47,6 +53,11 @@ struct Storage final {
4753 strides(std::move(strides)),
4854 deleter(std::move(deleter)) {}
4955
56+ Storage (const Storage&) = delete;
57+ Storage& operator =(const Storage&) = delete ;
58+ Storage (Storage&&) = delete;
59+ Storage& operator =(Storage&&) = delete ;
60+
5061 ~Storage () {
5162 if (deleter) {
5263 deleter (tensor_impl.mutable_data ());
@@ -63,7 +74,8 @@ TensorPtr make_tensor_ptr(
6374 std::vector<executorch::aten::StridesType> strides,
6475 executorch::aten::ScalarType type,
6576 executorch::aten::TensorShapeDynamism dynamism,
66- std::function<void (void *)> deleter) {
77+ std::function<void (void *)> deleter,
78+ executorch::aten::Device device) {
6779 const auto dim = sizes.size ();
6880 ET_CHECK_MSG (
6981 dim_order.empty () || dim_order.size () == dim,
@@ -111,20 +123,22 @@ TensorPtr make_tensor_ptr(
111123 data,
112124 dim_order.data (),
113125 strides.data (),
114- dim > 0 ? dynamism : executorch::aten::TensorShapeDynamism::STATIC );
126+ dim > 0 ? dynamism : executorch::aten::TensorShapeDynamism::STATIC ,
127+ device.type (),
128+ device.index ());
115129 auto storage = std::make_shared<Storage>(
116130 std::move (tensor_impl),
117131 std::move (sizes),
118132 std::move (dim_order),
119133 std::move (strides),
120134 std::move (deleter));
121- const auto tensor_ptr = &storage->tensor ;
135+ const auto raw_tensor_ptr = &storage->tensor ;
122136 return std::shared_ptr<executorch::aten::Tensor>(
123- std::move (storage), tensor_ptr );
137+ std::move (storage), raw_tensor_ptr );
124138#else
125139 auto options = c10::TensorOptions ()
126140 .dtype (c10::scalarTypeToTypeMeta (type))
127- .device (c10:: kCPU );
141+ .device (device );
128142 auto storage = c10::Storage (
129143 c10::Storage::use_byte_size_t (),
130144 at::detail::computeStorageNbytes (
@@ -135,7 +149,7 @@ TensorPtr make_tensor_ptr(
135149 false );
136150 auto tensor_impl = c10::make_intrusive<executorch::aten::TensorImpl>(
137151 std::move (storage),
138- c10::DispatchKeySet (c10::DispatchKey:: CPU ),
152+ c10::DispatchKeySet (options. computeDispatchKey () ),
139153 options.dtype ());
140154 tensor_impl->set_sizes_and_strides (sizes, strides);
141155 return std::make_shared<executorch::aten::Tensor>(std::move (tensor_impl));
@@ -271,5 +285,101 @@ runtime::Error resize_tensor_ptr(
271285 sizes.data (), sizes.size ()));
272286}
273287
288+ // ---- Device tensor helpers ----
289+ //
290+ // These helpers rely on the ExecuTorch DeviceAllocator and the portable tensor
291+ // metadata APIs (dim_order, shape_dynamism, device), which have no equivalent
292+ // in USE_ATEN_LIB builds, so they are compiled out there.
293+
294+ #ifndef USE_ATEN_LIB
295+
296+ TensorPtr clone_tensor_ptr_to_device (
297+ const TensorPtr& cpu_tensor,
298+ executorch::aten::Device device) {
299+ ET_CHECK_MSG (
300+ cpu_tensor->device ().is_cpu (),
301+ " Source tensor must reside on CPU; got device type %d." ,
302+ static_cast <int >(cpu_tensor->device_type ()));
303+
304+ ET_CHECK_MSG (
305+ !device.is_cpu (),
306+ " Target device must not be CPU; use clone_tensor_ptr for CPU-to-CPU copies." );
307+
308+ auto * allocator = runtime::get_device_allocator (device.type ());
309+ ET_CHECK_MSG (
310+ allocator != nullptr ,
311+ " No device allocator registered for device type %d" ,
312+ static_cast <int >(device.type ()));
313+
314+ const auto nbytes = cpu_tensor->nbytes ();
315+ const auto * cpu_data = cpu_tensor->const_data_ptr ();
316+ ET_CHECK_MSG (cpu_data != nullptr , " Source tensor has no data." );
317+
318+ auto result = allocator->allocate (nbytes, device.index ());
319+ ET_CHECK_MSG (result.ok (), " Failed to allocate device memory." );
320+ void * device_data = result.get ();
321+
322+ auto err = allocator->copy_host_to_device (
323+ device_data, cpu_data, nbytes, device.index ());
324+ ET_CHECK_MSG (err == runtime::Error::Ok, " Host-to-device copy failed." );
325+
326+ std::vector<executorch::aten::SizesType> sizes (
327+ cpu_tensor->sizes ().begin (), cpu_tensor->sizes ().end ());
328+ std::vector<executorch::aten::DimOrderType> dim_order (
329+ cpu_tensor->dim_order ().begin (), cpu_tensor->dim_order ().end ());
330+ std::vector<executorch::aten::StridesType> strides (
331+ cpu_tensor->strides ().begin (), cpu_tensor->strides ().end ());
332+
333+ return make_tensor_ptr (
334+ std::move (sizes),
335+ device_data,
336+ std::move (dim_order),
337+ std::move (strides),
338+ cpu_tensor->scalar_type (),
339+ cpu_tensor->shape_dynamism (),
340+ [allocator, device](void * ptr) {
341+ allocator->deallocate (ptr, device.index ());
342+ },
343+ device);
344+ }
345+
346+ TensorPtr clone_tensor_ptr_to_cpu (const TensorPtr& device_tensor) {
347+ const auto nbytes = device_tensor->nbytes ();
348+ const auto * device_data = device_tensor->const_data_ptr ();
349+ ET_CHECK_MSG (device_data != nullptr , " Source device tensor has no data." );
350+
351+ const auto device = device_tensor->device ();
352+ ET_CHECK_MSG (!device.is_cpu (), " Source tensor is already on CPU." );
353+
354+ auto * allocator = runtime::get_device_allocator (device.type ());
355+ ET_CHECK_MSG (
356+ allocator != nullptr ,
357+ " No device allocator registered for device type %d" ,
358+ static_cast <int >(device.type ()));
359+
360+ std::vector<uint8_t > cpu_data (nbytes);
361+
362+ auto err = allocator->copy_device_to_host (
363+ cpu_data.data (), device_data, nbytes, device.index ());
364+ ET_CHECK_MSG (err == runtime::Error::Ok, " Device-to-host copy failed." );
365+
366+ std::vector<executorch::aten::SizesType> sizes (
367+ device_tensor->sizes ().begin (), device_tensor->sizes ().end ());
368+ std::vector<executorch::aten::DimOrderType> dim_order (
369+ device_tensor->dim_order ().begin (), device_tensor->dim_order ().end ());
370+ std::vector<executorch::aten::StridesType> strides (
371+ device_tensor->strides ().begin (), device_tensor->strides ().end ());
372+
373+ return make_tensor_ptr (
374+ std::move (sizes),
375+ std::move (cpu_data),
376+ std::move (dim_order),
377+ std::move (strides),
378+ device_tensor->scalar_type (),
379+ device_tensor->shape_dynamism ());
380+ }
381+
382+ #endif // USE_ATEN_LIB
383+
274384} // namespace extension
275385} // namespace executorch
0 commit comments