Skip to content

Commit fce7663

Browse files
committed
[ET Device Support] Schema changes: device info on Tensor and buffer-level device array
This diff adds device placement information to the ExecuTorch schema to support representing tensor-level device type information, which will be the basic requirement for the following tensor_parser updates. This is part of the Phase 1 implementation to make ET device type work E2E without user-specified device placement. Design doc: https://docs.google.com/document/d/1lwd9BlohmwkN5EEvRulO_b-XnZBwv1nMb5l2K3jfuwA/edit?tab=t.0#heading=h.o6anuvkix4bu Differential Revision: [D93635657](https://our.internmc.facebook.com/intern/diff/D93635657/) ghstack-source-id: 342367954 Pull Request resolved: #17533
1 parent fc3239c commit fce7663

2 files changed

Lines changed: 96 additions & 0 deletions

File tree

exir/schema.py

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,17 @@ class TensorDataLocation(IntEnum):
4848
EXTERNAL = 1
4949

5050

51+
class DeviceType(IntEnum):
52+
"""
53+
Device type enum indicating where a tensor resides or should be allocated.
54+
Note that this enum is not directly mapped to the DeviceType enum in pytorch/pytorch
55+
Check program.fbs for explanations of this enum.
56+
"""
57+
58+
CPU = 0
59+
CUDA = 1
60+
61+
5162
@dataclass
5263
class ExtraTensorInfo:
5364
"""
@@ -57,6 +68,12 @@ class ExtraTensorInfo:
5768
mutable_data_segments_idx: int = 0
5869
fully_qualified_name: Optional[str] = None
5970
location: TensorDataLocation = TensorDataLocation.SEGMENT
71+
# Device type where this tensor resides or should be allocated.
72+
# Defaults to CPU for backward compatibility.
73+
device_type: DeviceType = DeviceType.CPU
74+
# Device index for multi-device scenarios (e.g., cuda:0, cuda:1).
75+
# A value of -1 indicates the default device.
76+
device_index: int = -1
6077

6178

6279
@dataclass
@@ -261,6 +278,26 @@ class Operator:
261278
overload: str
262279

263280

281+
@dataclass
282+
class NonConstBufferDevice:
283+
"""
284+
Device placement information for a non-constant memory buffer.
285+
This is a sparse representation: only buffers that are NOT on CPU need entries.
286+
Buffers not listed in ExecutionPlan.non_const_buffer_device default to CPU.
287+
Check program.fbs for explanations.
288+
"""
289+
290+
# Index into ExecutionPlan.non_const_buffer_sizes identifying which buffer
291+
# this entry applies to.
292+
buffer_index: int
293+
# The device type where this buffer should be allocated.
294+
# Defaults to CPU for backward compatibility.
295+
device_type: DeviceType = DeviceType.CPU
296+
# The device index for multi-device scenarios (e.g., cuda:0, cuda:1).
297+
# A value of -1 indicates the default device.
298+
device_index: int = -1
299+
300+
264301
@dataclass
265302
class ExecutionPlan:
266303
name: str
@@ -276,6 +313,12 @@ class ExecutionPlan:
276313
# Runtime should use the len(constant_buffer) as the ground truch of
277314
# constant memory buffer size, and ignore non_const_buffer_sizes[0].
278315
non_const_buffer_sizes: List[int]
316+
# [Optional] Sparse device placement information for non-constant buffers.
317+
# Only buffers that are NOT on CPU need to be listed here. Each entry
318+
# specifies a buffer_index (into non_const_buffer_sizes) and its device.
319+
# Buffers not listed here default to CPU, saving binary size when most
320+
# buffers are on CPU.
321+
non_const_buffer_device: Optional[List[NonConstBufferDevice]] = None
279322

280323

281324
@dataclass

schema/program.fbs

Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,25 @@ enum TensorDataLocation : byte {
6161
EXTERNAL = 1,
6262
}
6363

64+
// Device type enum indicating where a tensor resides or should be allocated.
65+
// Follows PyTorch DeviceType convention for compatibility.
66+
enum DeviceType : byte {
67+
CPU = 0,
68+
CUDA = 1,
69+
// Reserve slots for future device types following PyTorch convention:
70+
// MKLDNN = 2,
71+
// OPENGL = 3,
72+
// OPENCL = 4,
73+
// IDEEP = 5,
74+
// HIP = 6,
75+
// FPGA = 7,
76+
// MAIA = 8,
77+
// XLA = 9,
78+
// MPS = 10,
79+
// XPU = 11,
80+
// PrivateUse1 = 12,
81+
}
82+
6483
// Table to put additional information about tensors in that is not applicable
6584
// to the vast majority of tensors in the vast majority of programs.
6685
table ExtraTensorInfo {
@@ -79,6 +98,15 @@ table ExtraTensorInfo {
7998
// must be non-empty, and is used as a key to find the tensor's external
8099
// data. Tensor.data_buffer_idx is ignored.
81100
location: TensorDataLocation;
101+
102+
// [Optional] The device type where this tensor resides or should be allocated.
103+
// Defaults to CPU for backward compatibility with existing PTE files.
104+
device_type: DeviceType = CPU;
105+
106+
// [Optional] The device index for multi-device scenarios (e.g., cuda:0, cuda:1).
107+
// A value of -1 indicates the default device. Defaults to -1 for backward
108+
// compatibility.
109+
device_index: byte = -1;
82110
}
83111

84112
table Tensor {
@@ -386,6 +414,13 @@ table ExecutionPlan {
386414
// constants memory buffer size, and ignore non_const_buffer_sizes[0].
387415
non_const_buffer_sizes: [int64];
388416

417+
// [Optional] Sparse device placement information for non-constant buffers.
418+
// Only buffers that are NOT on CPU need to be listed here. Each entry
419+
// specifies a buffer_index (into non_const_buffer_sizes) and its device.
420+
// Buffers not listed here default to CPU, saving binary size when most
421+
// buffers are on CPU.
422+
non_const_buffer_device: [NonConstBufferDevice];
423+
389424
}
390425

391426
// Constant tensor data stored directly in the flatbuffer.
@@ -406,6 +441,24 @@ table BackendDelegateInlineData {
406441
data: [ubyte] (force_align: 16); // @executorch-delegate-alignment
407442
}
408443

444+
// Device placement information for a non-constant memory buffer.
445+
// This is a sparse representation: only buffers that are NOT on CPU need entries.
446+
// Buffers not listed in ExecutionPlan.non_const_buffer_device default to CPU.
447+
table NonConstBufferDevice {
448+
// Index into ExecutionPlan.non_const_buffer_sizes identifying which buffer
449+
// this entry applies to.
450+
buffer_index: uint32;
451+
452+
// The device type where this buffer should be allocated.
453+
// Defaults to CPU for backward compatibility with existing PTE files.
454+
device_type: DeviceType = CPU;
455+
456+
// The device index for multi-device scenarios (e.g., cuda:0, cuda:1).
457+
// A value of -1 indicates the default device. Defaults to -1 for backward
458+
// compatibility.
459+
device_index: byte = -1;
460+
}
461+
409462
// Describes a contiguous piece of data that lives outside of the flatbuffer data,
410463
// typically appended afterwards in the file. The "extended header" in the file,
411464
// when present, points to the segment base offset.

0 commit comments

Comments
 (0)