Skip to content

Commit 01b3568

Browse files
authored
Add device tensor helper functions to TensorPtr API (pytorch#20005)
Differential Revision: D99913077 Pull Request resolved: pytorch#20005
1 parent 8928480 commit 01b3568

7 files changed

Lines changed: 700 additions & 31 deletions

File tree

extension/tensor/targets.bzl

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ def define_common_targets():
2424
],
2525
visibility = ["PUBLIC"],
2626
deps = [
27+
"//executorch/runtime/core:device_allocator",
2728
"//executorch/runtime/core/exec_aten/util:dim_order_util" + aten_suffix,
2829
"//executorch/runtime/core/exec_aten/util:tensor_util" + aten_suffix,
2930
],

extension/tensor/tensor_ptr.cpp

Lines changed: 116 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,9 @@
1212

1313
#include <c10/util/safe_numerics.h>
1414

15+
#ifndef USE_ATEN_LIB
16+
#include <executorch/runtime/core/device_allocator.h>
17+
#endif // USE_ATEN_LIB
1518
#include <executorch/runtime/core/exec_aten/util/tensor_util.h>
1619

1720
namespace executorch {
@@ -25,6 +28,9 @@ namespace {
2528
* ensures that they are managed together and have the same lifetime as the
2629
* Tensor. When the Tensor is destroyed, the Storage structure ensures
2730
* proper cleanup of the associated metadata and data if needed.
31+
*
32+
* For device tensors, the data pointer points to device memory; the deleter
33+
* is responsible for freeing it through the appropriate DeviceAllocator.
2834
*/
2935
struct Storage final {
3036
executorch::aten::TensorImpl tensor_impl;
@@ -47,6 +53,11 @@ struct Storage final {
4753
strides(std::move(strides)),
4854
deleter(std::move(deleter)) {}
4955

56+
Storage(const Storage&) = delete;
57+
Storage& operator=(const Storage&) = delete;
58+
Storage(Storage&&) = delete;
59+
Storage& operator=(Storage&&) = delete;
60+
5061
~Storage() {
5162
if (deleter) {
5263
deleter(tensor_impl.mutable_data());
@@ -63,7 +74,8 @@ TensorPtr make_tensor_ptr(
6374
std::vector<executorch::aten::StridesType> strides,
6475
executorch::aten::ScalarType type,
6576
executorch::aten::TensorShapeDynamism dynamism,
66-
std::function<void(void*)> deleter) {
77+
std::function<void(void*)> deleter,
78+
executorch::aten::Device device) {
6779
const auto dim = sizes.size();
6880
ET_CHECK_MSG(
6981
dim_order.empty() || dim_order.size() == dim,
@@ -111,20 +123,22 @@ TensorPtr make_tensor_ptr(
111123
data,
112124
dim_order.data(),
113125
strides.data(),
114-
dim > 0 ? dynamism : executorch::aten::TensorShapeDynamism::STATIC);
126+
dim > 0 ? dynamism : executorch::aten::TensorShapeDynamism::STATIC,
127+
device.type(),
128+
device.index());
115129
auto storage = std::make_shared<Storage>(
116130
std::move(tensor_impl),
117131
std::move(sizes),
118132
std::move(dim_order),
119133
std::move(strides),
120134
std::move(deleter));
121-
const auto tensor_ptr = &storage->tensor;
135+
const auto raw_tensor_ptr = &storage->tensor;
122136
return std::shared_ptr<executorch::aten::Tensor>(
123-
std::move(storage), tensor_ptr);
137+
std::move(storage), raw_tensor_ptr);
124138
#else
125139
auto options = c10::TensorOptions()
126140
.dtype(c10::scalarTypeToTypeMeta(type))
127-
.device(c10::kCPU);
141+
.device(device);
128142
auto storage = c10::Storage(
129143
c10::Storage::use_byte_size_t(),
130144
at::detail::computeStorageNbytes(
@@ -135,7 +149,7 @@ TensorPtr make_tensor_ptr(
135149
false);
136150
auto tensor_impl = c10::make_intrusive<executorch::aten::TensorImpl>(
137151
std::move(storage),
138-
c10::DispatchKeySet(c10::DispatchKey::CPU),
152+
c10::DispatchKeySet(options.computeDispatchKey()),
139153
options.dtype());
140154
tensor_impl->set_sizes_and_strides(sizes, strides);
141155
return std::make_shared<executorch::aten::Tensor>(std::move(tensor_impl));
@@ -271,5 +285,101 @@ runtime::Error resize_tensor_ptr(
271285
sizes.data(), sizes.size()));
272286
}
273287

288+
// ---- Device tensor helpers ----
289+
//
290+
// These helpers rely on the ExecuTorch DeviceAllocator and the portable tensor
291+
// metadata APIs (dim_order, shape_dynamism, device), which have no equivalent
292+
// in USE_ATEN_LIB builds, so they are compiled out there.
293+
294+
#ifndef USE_ATEN_LIB
295+
296+
TensorPtr clone_tensor_ptr_to_device(
297+
const TensorPtr& cpu_tensor,
298+
executorch::aten::Device device) {
299+
ET_CHECK_MSG(
300+
cpu_tensor->device().is_cpu(),
301+
"Source tensor must reside on CPU; got device type %d.",
302+
static_cast<int>(cpu_tensor->device_type()));
303+
304+
ET_CHECK_MSG(
305+
!device.is_cpu(),
306+
"Target device must not be CPU; use clone_tensor_ptr for CPU-to-CPU copies.");
307+
308+
auto* allocator = runtime::get_device_allocator(device.type());
309+
ET_CHECK_MSG(
310+
allocator != nullptr,
311+
"No device allocator registered for device type %d",
312+
static_cast<int>(device.type()));
313+
314+
const auto nbytes = cpu_tensor->nbytes();
315+
const auto* cpu_data = cpu_tensor->const_data_ptr();
316+
ET_CHECK_MSG(cpu_data != nullptr, "Source tensor has no data.");
317+
318+
auto result = allocator->allocate(nbytes, device.index());
319+
ET_CHECK_MSG(result.ok(), "Failed to allocate device memory.");
320+
void* device_data = result.get();
321+
322+
auto err = allocator->copy_host_to_device(
323+
device_data, cpu_data, nbytes, device.index());
324+
ET_CHECK_MSG(err == runtime::Error::Ok, "Host-to-device copy failed.");
325+
326+
std::vector<executorch::aten::SizesType> sizes(
327+
cpu_tensor->sizes().begin(), cpu_tensor->sizes().end());
328+
std::vector<executorch::aten::DimOrderType> dim_order(
329+
cpu_tensor->dim_order().begin(), cpu_tensor->dim_order().end());
330+
std::vector<executorch::aten::StridesType> strides(
331+
cpu_tensor->strides().begin(), cpu_tensor->strides().end());
332+
333+
return make_tensor_ptr(
334+
std::move(sizes),
335+
device_data,
336+
std::move(dim_order),
337+
std::move(strides),
338+
cpu_tensor->scalar_type(),
339+
cpu_tensor->shape_dynamism(),
340+
[allocator, device](void* ptr) {
341+
allocator->deallocate(ptr, device.index());
342+
},
343+
device);
344+
}
345+
346+
TensorPtr clone_tensor_ptr_to_cpu(const TensorPtr& device_tensor) {
347+
const auto nbytes = device_tensor->nbytes();
348+
const auto* device_data = device_tensor->const_data_ptr();
349+
ET_CHECK_MSG(device_data != nullptr, "Source device tensor has no data.");
350+
351+
const auto device = device_tensor->device();
352+
ET_CHECK_MSG(!device.is_cpu(), "Source tensor is already on CPU.");
353+
354+
auto* allocator = runtime::get_device_allocator(device.type());
355+
ET_CHECK_MSG(
356+
allocator != nullptr,
357+
"No device allocator registered for device type %d",
358+
static_cast<int>(device.type()));
359+
360+
std::vector<uint8_t> cpu_data(nbytes);
361+
362+
auto err = allocator->copy_device_to_host(
363+
cpu_data.data(), device_data, nbytes, device.index());
364+
ET_CHECK_MSG(err == runtime::Error::Ok, "Device-to-host copy failed.");
365+
366+
std::vector<executorch::aten::SizesType> sizes(
367+
device_tensor->sizes().begin(), device_tensor->sizes().end());
368+
std::vector<executorch::aten::DimOrderType> dim_order(
369+
device_tensor->dim_order().begin(), device_tensor->dim_order().end());
370+
std::vector<executorch::aten::StridesType> strides(
371+
device_tensor->strides().begin(), device_tensor->strides().end());
372+
373+
return make_tensor_ptr(
374+
std::move(sizes),
375+
std::move(cpu_data),
376+
std::move(dim_order),
377+
std::move(strides),
378+
device_tensor->scalar_type(),
379+
device_tensor->shape_dynamism());
380+
}
381+
382+
#endif // USE_ATEN_LIB
383+
274384
} // namespace extension
275385
} // namespace executorch

0 commit comments

Comments
 (0)