Skip to content

Commit adff306

Browse files
committed
[ET Device Support] MethodMeta: expose per-buffer device placement API
Add memory_planned_buffer_device(index) to MethodMeta, returning the Device (type + index) for each planned memory buffer. This reads from the non_const_buffer_device field in the serialized ExecutionPlan. For CPU-only programs (or legacy PTE files without non_const_buffer_device), all buffers default to Device{CPU, 0}. The sparse list only stores entries for non-CPU buffers, so the lookup scans for a matching buffer_idx. This API enables Module::load_method() to query each buffer's target device and allocate accordingly (malloc for CPU, DeviceAllocator for CUDA, etc.). Differential Revision: [D97850708](https://our.internmc.facebook.com/intern/diff/D97850708/) ghstack-source-id: 357060897 Pull Request resolved: #18474
1 parent 47a8b76 commit adff306

6 files changed

Lines changed: 114 additions & 3 deletions

File tree

runtime/core/test/targets.bzl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,7 @@ def define_common_targets():
5050
"//executorch/runtime/core:core",
5151
],
5252
)
53-
53+
5454
runtime.cxx_test(
5555
name = "event_tracer_test",
5656
srcs = [

runtime/executor/method_meta.cpp

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -325,6 +325,42 @@ Result<int64_t> MethodMeta::memory_planned_buffer_size(size_t index) const {
325325
return s_plan_->non_const_buffer_sizes()->Get(index + 1);
326326
}
327327

328+
Result<etensor::Device> MethodMeta::memory_planned_buffer_device(
329+
size_t index) const {
330+
auto num_buffers = this->num_memory_planned_buffers();
331+
ET_CHECK_OR_RETURN_ERROR(
332+
index < num_buffers,
333+
InvalidArgument,
334+
"index %zu out of range. num_buffers: %zu",
335+
index,
336+
num_buffers);
337+
338+
// The non_const_buffer_device field is optional and only present when the
339+
// program contains non-CPU buffers. For CPU-only programs (or legacy PTE
340+
// files), this field is null and all buffers default to CPU.
341+
auto* buffer_devices = s_plan_->non_const_buffer_device();
342+
if (buffer_devices == nullptr) {
343+
return etensor::Device{etensor::DeviceType::CPU, 0};
344+
}
345+
346+
// The sparse list only contains entries for non-CPU buffers.
347+
// buffer_idx uses the same indexing as non_const_buffer_sizes (1-based,
348+
// with index 0 reserved). The user-facing index is 0-based, so we
349+
// compare against index + 1.
350+
const auto internal_idx = static_cast<int32_t>(index + 1);
351+
for (size_t i = 0; i < buffer_devices->size(); ++i) {
352+
auto entry = buffer_devices->Get(i);
353+
if (entry->buffer_idx() == internal_idx) {
354+
return etensor::Device{
355+
static_cast<etensor::DeviceType>(entry->device_type()),
356+
static_cast<etensor::DeviceIndex>(entry->device_index())};
357+
}
358+
}
359+
360+
// Not found in the sparse list — this buffer is on CPU.
361+
return etensor::Device{etensor::DeviceType::CPU, 0};
362+
}
363+
328364
bool MethodMeta::uses_backend(const char* backend_name) const {
329365
ET_CHECK_MSG(backend_name, "backend name is null");
330366
const auto delegates = s_plan_->delegates();

runtime/executor/method_meta.h

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
#pragma once
1010

1111
#include <executorch/runtime/core/exec_aten/exec_aten.h>
12+
#include <executorch/runtime/core/portable_type/device.h>
1213
#include <executorch/runtime/core/result.h>
1314
#include <executorch/runtime/core/span.h>
1415
#include <executorch/runtime/core/tag.h>
@@ -234,6 +235,19 @@ class MethodMeta final {
234235
*/
235236
Result<int64_t> memory_planned_buffer_size(size_t index) const;
236237

238+
/**
239+
* Get the device placement for the specified memory-planned buffer.
240+
*
241+
* For CPU-only programs (no non_const_buffer_device in the PTE), all buffers
242+
* default to Device{CPU, 0}. For programs with device annotations, returns
243+
* the device type and index that the buffer should be allocated on.
244+
*
245+
* @param[in] index The index of the buffer to look up (0-based, same
246+
* indexing as memory_planned_buffer_size()).
247+
* @returns The Device on success, or an error on failure.
248+
*/
249+
Result<etensor::Device> memory_planned_buffer_device(size_t index) const;
250+
237251
/**
238252
* Check to see if a backend is used in this method.
239253
*

runtime/executor/test/method_meta_test.cpp

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,10 @@ class MethodMetaTest : public ::testing::Test {
7474
void SetUp() override {
7575
load_program(std::getenv("ET_MODULE_ADD_PATH"), "add");
7676
load_program(std::getenv("ET_MODULE_STATEFUL_PATH"), "stateful");
77+
const char* device_path = std::getenv("ET_MODULE_ADD_WITH_DEVICE_PATH");
78+
if (device_path != nullptr) {
79+
load_program(device_path, "add_with_device");
80+
}
7781
}
7882

7983
private:
@@ -192,6 +196,27 @@ TEST_F(MethodMetaTest, MethodMetaAttribute) {
192196
ASSERT_EQ(bad_access.error(), Error::InvalidArgument);
193197
}
194198

199+
TEST_F(MethodMetaTest, MemoryPlannedBufferDeviceDefaultsCpu) {
200+
Result<MethodMeta> method_meta = programs_["add"]->method_meta("forward");
201+
ASSERT_EQ(method_meta.error(), Error::Ok);
202+
203+
// CPU-only model: all buffers should default to CPU device.
204+
size_t num_buffers = method_meta->num_memory_planned_buffers();
205+
ASSERT_GT(num_buffers, 0);
206+
207+
for (size_t i = 0; i < num_buffers; ++i) {
208+
auto device = method_meta->memory_planned_buffer_device(i);
209+
ASSERT_TRUE(device.ok());
210+
EXPECT_EQ(device->type(), executorch::runtime::etensor::DeviceType::CPU);
211+
EXPECT_EQ(device->index(), 0);
212+
}
213+
214+
// Out of range returns error.
215+
EXPECT_EQ(
216+
method_meta->memory_planned_buffer_device(num_buffers).error(),
217+
Error::InvalidArgument);
218+
}
219+
195220
TEST_F(MethodMetaTest, TensorInfoSizeOverflow) {
196221
// Create sizes that will cause overflow when multiplied
197222
std::vector<int32_t> overflow_sizes = {
@@ -214,3 +239,29 @@ TEST_F(MethodMetaTest, TensorInfoSizeOverflow) {
214239
executorch::aten::string_view{nullptr, 0}),
215240
"");
216241
}
242+
243+
TEST_F(MethodMetaTest, MethodMetaBufferDeviceReturnsCudaForDeviceBuffer) {
244+
ASSERT_NE(programs_.find("add_with_device"), programs_.end())
245+
<< "ET_MODULE_ADD_WITH_DEVICE_PATH env var not set";
246+
Result<MethodMeta> method_meta =
247+
programs_["add_with_device"]->method_meta("forward");
248+
ASSERT_EQ(method_meta.error(), Error::Ok);
249+
250+
// ModuleAddWithDevice exports with enable_non_cpu_memory_planning=True.
251+
// The model delegates add(a,b) to CUDA, producing:
252+
// non_const_buffer_sizes: [0, 48] (index 0 reserved)
253+
// non_const_buffer_device: [{buffer_idx=1, device_type=CUDA, device_index=0}]
254+
// So there is exactly 1 planned buffer (user-facing index 0), on CUDA.
255+
ASSERT_EQ(method_meta->num_memory_planned_buffers(), 1);
256+
257+
// Buffer 0 should be CUDA device.
258+
auto device = method_meta->memory_planned_buffer_device(0);
259+
ASSERT_TRUE(device.ok());
260+
EXPECT_EQ(device->type(), executorch::runtime::etensor::DeviceType::CUDA);
261+
EXPECT_EQ(device->index(), 0);
262+
263+
// Out of range should return error.
264+
EXPECT_EQ(
265+
method_meta->memory_planned_buffer_device(1).error(),
266+
Error::InvalidArgument);
267+
}

runtime/executor/test/targets.bzl

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -178,7 +178,12 @@ def define_common_targets(is_fbcode = False):
178178
"//executorch/runtime/executor:program",
179179
"//executorch/extension/data_loader:file_data_loader",
180180
],
181-
env = modules_env,
181+
env = dict(
182+
modules_env,
183+
**{
184+
"ET_MODULE_ADD_WITH_DEVICE_PATH": "$(location fbcode//executorch/test/models:exported_program_with_device_info[ModuleAddWithDevice.pte])",
185+
}
186+
),
182187
)
183188

184189
runtime.cxx_test(

test/models/export_program_with_device_info.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -99,7 +99,12 @@ def main() -> None:
9999
compile_config=EdgeCompileConfig(_check_ir_validity=False),
100100
)
101101
lowered = edge.to_backend(_DeviceAwarePartitioner())
102-
et_prog = lowered.to_executorch(ExecutorchBackendConfig(emit_stacktrace=False))
102+
et_prog = lowered.to_executorch(
103+
ExecutorchBackendConfig(
104+
emit_stacktrace=False,
105+
enable_non_cpu_memory_planning=True,
106+
)
107+
)
103108

104109
os.makedirs(args.outdir, exist_ok=True)
105110
outfile = os.path.join(args.outdir, "ModuleAddWithDevice.pte")

0 commit comments

Comments
 (0)