[ET Device Support] TensorImpl carries device info

Gasoonjia · Gasoonjia · commit 91f2c3af9ac6 · 2026-02-18T11:25:49.000-08:00
This diff extends `TensorImpl` to carry device information, enabling the runtime tensor to track which device its data resides on (CPU, CUDA, etc.). This is a prerequisite for parsing device info from the schema and allocating device memory. Differential Revision: [D93635655](https://our.internmc.facebook.com/intern/diff/D93635655/) ghstack-source-id: 342367953 Pull Request resolved: #17534
diff --git a/runtime/core/portable_type/tensor_impl.cpp b/runtime/core/portable_type/tensor_impl.cpp
@@ -50,7 +50,9 @@ TensorImpl::TensorImpl(
     void* data,
     DimOrderType* dim_order,
     StridesType* strides,
-    TensorShapeDynamism dynamism)
+    TensorShapeDynamism dynamism,
+    DeviceType device_type,
+    DeviceIndex device_index)
     : sizes_(sizes),
       dim_order_(dim_order),
       strides_(strides),
@@ -59,7 +61,8 @@ TensorImpl::TensorImpl(
       numel_(compute_numel(sizes, dim)),
       numel_bound_(numel_),
       type_(type),
-      shape_dynamism_(dynamism) {
+      shape_dynamism_(dynamism),
+      device_(device_type, device_index) {
   ET_CHECK_MSG(
       isValid(type_), "Invalid type %" PRId8, static_cast<int8_t>(type_));
   ET_CHECK_MSG(dim_ >= 0, "Dimension must be non-negative, got %zd", dim_);
diff --git a/runtime/core/portable_type/tensor_impl.h b/runtime/core/portable_type/tensor_impl.h
@@ -10,6 +10,7 @@
 
 #include <executorch/runtime/core/array_ref.h>
 #include <executorch/runtime/core/error.h>
+#include <executorch/runtime/core/portable_type/device.h>
 #include <executorch/runtime/core/portable_type/scalar_type.h>
 #include <executorch/runtime/core/tensor_shape_dynamism.h>
 
@@ -99,6 +100,8 @@ class TensorImpl {
    * @param strides Strides of the tensor at each dimension. Must contain `dim`
    *     entries.
    * @param dynamism The mutability of the shape of the tensor.
+   * @param device_type The type of device where tensor data resides.
+   * @param device_index The device index for multi-device scenarios.
    */
   TensorImpl(
       ScalarType type,
@@ -107,7 +110,9 @@ class TensorImpl {
       void* data = nullptr,
       DimOrderType* dim_order = nullptr,
       StridesType* strides = nullptr,
-      TensorShapeDynamism dynamism = TensorShapeDynamism::STATIC);
+      TensorShapeDynamism dynamism = TensorShapeDynamism::STATIC,
+      DeviceType device_type = DeviceType::CPU,
+      DeviceIndex device_index = -1);
 
   /**
    * Returns the size of the tensor in bytes.
@@ -176,6 +181,21 @@ class TensorImpl {
     return shape_dynamism_;
   }
 
+  /// Returns the device where tensor data resides.
+  Device device() const {
+    return device_;
+  }
+
+  /// Returns the type of device where tensor data resides.
+  DeviceType device_type() const {
+    return device_.type();
+  }
+
+  /// Returns the device index, or -1 if default/unspecified.
+  DeviceIndex device_index() const {
+    return device_.index();
+  }
+
   /// Returns a pointer of type T to the constant underlying data blob.
   template <typename T>
   inline const T* data() const {
@@ -261,6 +281,9 @@ class TensorImpl {
 
   /// Specifies the mutability of the shape of the tensor.
   const TensorShapeDynamism shape_dynamism_;
+
+  /// Device where tensor data resides (CPU, CUDA, etc.)
+  Device device_;
 };
 
 /**
diff --git a/runtime/core/portable_type/test/tensor_impl_test.cpp b/runtime/core/portable_type/test/tensor_impl_test.cpp
@@ -21,6 +21,9 @@ using namespace ::testing;
 using executorch::runtime::ArrayRef;
 using executorch::runtime::Error;
 using executorch::runtime::TensorShapeDynamism;
+using executorch::runtime::etensor::Device;
+using executorch::runtime::etensor::DeviceIndex;
+using executorch::runtime::etensor::DeviceType;
 using executorch::runtime::etensor::ScalarType;
 using executorch::runtime::etensor::TensorImpl;
 using SizesType = TensorImpl::SizesType;
@@ -449,3 +452,112 @@ TEST_F(TensorImplTest, TestResizingTensorToZeroAndBack) {
   EXPECT_GT(t.numel(), 0);
   EXPECT_EQ(t.data(), data);
 }
+
+// ============== Device Tests ==============
+
+TEST_F(TensorImplTest, TestDefaultDeviceIsCPU) {
+  // TensorImpl constructed without device parameters should default to CPU
+  SizesType sizes[2] = {3, 2};
+  float data[6] = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0};
+  TensorImpl t(ScalarType::Float, 2, sizes, data);
+
+  EXPECT_EQ(t.device_type(), DeviceType::CPU);
+  EXPECT_EQ(t.device_index(), -1);
+  EXPECT_EQ(t.device(), Device(DeviceType::CPU, -1));
+}
+
+TEST_F(TensorImplTest, TestExplicitCPUDevice) {
+  // TensorImpl constructed with explicit CPU device
+  SizesType sizes[2] = {3, 2};
+  DimOrderType dim_order[2] = {0, 1};
+  StridesType strides[2] = {2, 1};
+  float data[6] = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0};
+  TensorImpl t(
+      ScalarType::Float,
+      2,
+      sizes,
+      data,
+      dim_order,
+      strides,
+      TensorShapeDynamism::STATIC,
+      DeviceType::CPU,
+      0);
+
+  EXPECT_EQ(t.device_type(), DeviceType::CPU);
+  EXPECT_EQ(t.device_index(), 0);
+  EXPECT_EQ(t.device(), Device(DeviceType::CPU, 0));
+}
+
+TEST_F(TensorImplTest, TestCUDADevice) {
+  // TensorImpl constructed with CUDA device
+  SizesType sizes[2] = {3, 2};
+  DimOrderType dim_order[2] = {0, 1};
+  StridesType strides[2] = {2, 1};
+  float data[6] = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0};
+  TensorImpl t(
+      ScalarType::Float,
+      2,
+      sizes,
+      data,
+      dim_order,
+      strides,
+      TensorShapeDynamism::STATIC,
+      DeviceType::CUDA,
+      0);
+
+  EXPECT_EQ(t.device_type(), DeviceType::CUDA);
+  EXPECT_EQ(t.device_index(), 0);
+  EXPECT_EQ(t.device(), Device(DeviceType::CUDA, 0));
+}
+
+TEST_F(TensorImplTest, TestCUDADeviceMultiGPU) {
+  // TensorImpl with CUDA device index 1 (second GPU)
+  SizesType sizes[2] = {3, 2};
+  DimOrderType dim_order[2] = {0, 1};
+  StridesType strides[2] = {2, 1};
+  float data[6] = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0};
+  TensorImpl t(
+      ScalarType::Float,
+      2,
+      sizes,
+      data,
+      dim_order,
+      strides,
+      TensorShapeDynamism::STATIC,
+      DeviceType::CUDA,
+      1);
+
+  EXPECT_EQ(t.device_type(), DeviceType::CUDA);
+  EXPECT_EQ(t.device_index(), 1);
+  EXPECT_EQ(t.device(), Device(DeviceType::CUDA, 1));
+}
+
+TEST_F(TensorImplTest, TestDeviceWithDynamicTensor) {
+  // Device info should work correctly with dynamic tensors
+  SizesType sizes[2] = {3, 2};
+  DimOrderType dim_order[2] = {0, 1};
+  StridesType strides[2] = {2, 1};
+  float data[6] = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0};
+  TensorImpl t(
+      ScalarType::Float,
+      2,
+      sizes,
+      data,
+      dim_order,
+      strides,
+      TensorShapeDynamism::DYNAMIC_BOUND,
+      DeviceType::CUDA,
+      0);
+
+  EXPECT_EQ(t.device_type(), DeviceType::CUDA);
+  EXPECT_EQ(t.device_index(), 0);
+
+  // Resize should not affect device
+  SizesType new_sizes[2] = {2, 2};
+  Error err = resize_tensor_impl(&t, {new_sizes, 2});
+  EXPECT_EQ(err, Error::Ok);
+
+  // Device should remain unchanged after resize
+  EXPECT_EQ(t.device_type(), DeviceType::CUDA);
+  EXPECT_EQ(t.device_index(), 0);
+}