microsoft · ericcraw · Apr 10, 2026 · Apr 30, 2026 · May 2, 2026 · May 2, 2026
diff --git a/onnxruntime/python/onnxruntime_inference_collection.py b/onnxruntime/python/onnxruntime_inference_collection.py
@@ -1083,6 +1083,7 @@ def ortvalue_from_shape_and_type(
         device_type: str = "cpu",
         device_id: int = 0,
         vendor_id: int | OrtDeviceVendorId = -1,
+        memory_info: C.OrtMemoryInfo | None = None,
     ) -> OrtValue:
         """
         Factory method to construct an OrtValue (which holds a Tensor) from given shape and element_type
@@ -1092,8 +1093,31 @@ def ortvalue_from_shape_and_type(
         :param device_type: e.g. cpu, cuda, cann, cpu by default
         :param device_id: device id, e.g. 0
         :param vendor_id: The device's PCI vendor id as an int or OrtDeviceVendorId. If provided, the device type should be "gpu" or "npu".
+        :param memory_info: An OrtMemoryInfo from an OrtEpDevice (e.g. via ep_device.memory_info(OrtDeviceMemoryType.HOST_ACCESSIBLE)). When provided, the allocator matching this memory info is used directly, which allows allocating HOST_ACCESSIBLE memory for zero-copy numpy interop. The device_type, device_id, and vendor_id parameters are ignored when memory_info is provided.
         """
 
+        if memory_info is not None:
+            if device_type != "cpu" or device_id != 0 or vendor_id != -1:
+                warnings.warn(
+                    "device_type, device_id, and vendor_id are ignored when memory_info is provided.",
+                    stacklevel=2,
+                )
+            if isinstance(element_type, int):
+                return cls(
+                    C.OrtValue.ortvalue_from_shape_and_onnx_type_for_memory_info(
+                        shape,
+                        element_type,
+                        memory_info,
+                    )
+                )
+            return cls(
+                C.OrtValue.ortvalue_from_shape_and_type_for_memory_info(
+                    shape,
+                    element_type,
+                    memory_info,
+                )
+            )
+
         device = OrtDevice.make(device_type, device_id, vendor_id)._get_c_device()
 
         # Integer for onnx element type (see https://onnx.ai/onnx/api/mapping.html).

diff --git a/onnxruntime/python/onnxruntime_pybind_ortvalue.cc b/onnxruntime/python/onnxruntime_pybind_ortvalue.cc
@@ -74,6 +74,29 @@ std::unique_ptr<OrtValue> OrtValueFromShapeAndType(const std::vector<int64_t>& s
   Tensor::InitOrtValue(element_type, gsl::make_span(shape), std::move(allocator), *ml_value);
   return ml_value;
 }
+
+// Allocate an OrtValue using the shared allocator matching the given OrtMemoryInfo.
+// This allows callers to specify the exact memory type (e.g. HOST_ACCESSIBLE) rather than
+// relying on OrtDevice.make() which always uses DEFAULT.
+//
+// Uses the full OrtMemoryInfo for the lookup (including mem_type) rather than just the OrtDevice,
+// because the registered allocator's OrtMemoryInfo has a specific mem_type (e.g. OrtMemTypeCPU
+// for HOST_ACCESSIBLE) that must match for FindExistingAllocator to succeed.
+std::unique_ptr<OrtValue> OrtValueFromShapeAndTypeWithMemoryInfo(const std::vector<int64_t>& shape,
+                                                                 MLDataType element_type,
+                                                                 const OrtMemoryInfo& memory_info) {
+  auto& env = GetOrtEnv()->GetEnvironment();
+  AllocatorPtr allocator = env.GetRegisteredSharedAllocator(memory_info);
+
+  if (!allocator) {
+    throw std::runtime_error("No shared allocator found for: " + memory_info.ToString());
+  }
+
+  auto ml_value = std::make_unique<OrtValue>();
+  Tensor::InitOrtValue(element_type, gsl::make_span(shape), std::move(allocator), *ml_value);
+  return ml_value;
+}
+
 }  // namespace
 
 void addOrtValueMethods(pybind11::module& m) {
@@ -289,6 +312,32 @@ void addOrtValueMethods(pybind11::module& m) {
         auto element_type = OnnxTypeToOnnxRuntimeTensorType(onnx_element_type);
         return OrtValueFromShapeAndType(shape, element_type, device);
       })
+      // Factory methods to create an OrtValue using an OrtMemoryInfo to select the allocator.
+      // This enables allocation with a specific memory type (e.g. HOST_ACCESSIBLE) from plugin EPs.
+      .def_static("ortvalue_from_shape_and_type_for_memory_info", [](const std::vector<int64_t>& shape, py::object& numpy_element_type, const OrtMemoryInfo& memory_info) -> std::unique_ptr<OrtValue> {
+        PyArray_Descr* dtype;
+        if (!PyArray_DescrConverter(numpy_element_type.ptr(), &dtype)) {
+          throw std::runtime_error("Not a valid numpy type");
+        }
+
+        int type_num = dtype->type_num;
+        Py_DECREF(dtype);
+
+        if (!IsNumericNumpyType(type_num)) {
+          throw std::runtime_error("Creation of OrtValues is currently only supported from non-string numpy arrays");
+        }
+
+        auto element_type = NumpyTypeToOnnxRuntimeTensorType(type_num);
+        return OrtValueFromShapeAndTypeWithMemoryInfo(shape, element_type, memory_info);
+      })
+      .def_static("ortvalue_from_shape_and_onnx_type_for_memory_info", [](const std::vector<int64_t>& shape, int32_t onnx_element_type, const OrtMemoryInfo& memory_info) -> std::unique_ptr<OrtValue> {
+        if (onnx_element_type == onnx::TensorProto_DataType::TensorProto_DataType_STRING) {
+          throw std::runtime_error("Creation of OrtValues is currently only supported from non-string numpy arrays");
+        }
+
+        auto element_type = OnnxTypeToOnnxRuntimeTensorType(onnx_element_type);
+        return OrtValueFromShapeAndTypeWithMemoryInfo(shape, element_type, memory_info);
+      })
 
 #if !defined(DISABLE_SPARSE_TENSORS)
       .def_static("ort_value_from_sparse_tensor", [](const PySparseTensor* py_sparse_tensor) -> std::unique_ptr<OrtValue> {

diff --git a/onnxruntime/python/onnxruntime_pybind_state.cc b/onnxruntime/python/onnxruntime_pybind_state.cc
@@ -278,13 +278,12 @@ py::object GetPyObjFromTensor(const OrtValue& ort_value,
     return py::cast<py::object>(result);
   }
 
-  const auto device_type = device.Type();
   // Create a numpy array on top of the OrtValue memory, no copy,
   // but only when the tensor owns the buffer. When the tensor wraps external
   // memory (e.g. a numpy input array passed through as output), the buffer
   // lifetime is not tied to the OrtValue and zero-copy would create a
   // dangling pointer. See https://github.com/microsoft/onnxruntime/issues/21922
-  if (device_type == OrtDevice::CPU) {
+  if (device.UsesCpuMemory()) {
     if (tensor.OwnsBuffer() || zero_copy_non_owning) {
       py::array result = PrimitiveTensorToNumpyOverOrtValue(ort_value);
       return py::cast<py::object>(result);

diff --git a/onnxruntime/test/autoep/library/example_plugin_ep/ep_factory.cc b/onnxruntime/test/autoep/library/example_plugin_ep/ep_factory.cc
@@ -19,7 +19,8 @@ ExampleEpFactory::ExampleEpFactory(const char* ep_name, ApiPtrs apis, const OrtL
       default_logger_{default_logger},
       ep_name_{ep_name},
       default_memory_info_{nullptr},
-      readonly_memory_info_{nullptr} {
+      readonly_memory_info_{nullptr},
+      host_accessible_memory_info_{nullptr} {
   ort_version_supported = ORT_API_VERSION;  // set to the ORT version we were compiled with.
   GetName = GetNameImpl;
   GetVendor = GetVendorImpl;
@@ -71,12 +72,12 @@ ExampleEpFactory::ExampleEpFactory(const char* ep_name, ApiPtrs apis, const OrtL
 
   // HOST_ACCESSIBLE memory example. use the non-CPU device type so it's clear which device the memory is also
   // accessible from. we infer from the type of HOST_ACCESSIBLE that it's CPU accessible.
-  auto host_accessible_memory_info = Ort::MemoryInfo{"ExampleEP GPU pinned",
-                                                     OrtMemoryInfoDeviceType_GPU,
-                                                     /*vendor*/ 0xBE57, /* device_id */ 0,
-                                                     OrtDeviceMemoryType_HOST_ACCESSIBLE,
-                                                     /*alignment*/ 0,
-                                                     OrtAllocatorType::OrtDeviceAllocator};
+  host_accessible_memory_info_ = Ort::MemoryInfo{"ExampleEP GPU pinned",
+                                                 OrtMemoryInfoDeviceType_GPU,
+                                                 /*vendor*/ 0xBE57, /* device_id */ 0,
+                                                 OrtDeviceMemoryType_HOST_ACCESSIBLE,
+                                                 /*alignment*/ 0,
+                                                 OrtAllocatorType::OrtDeviceAllocator};
   // Custom Op Domains
   custom_op_domains_[0] = Ort::CustomOpDomain{"test"};
   custom_op_domains_[1] = Ort::CustomOpDomain{"test2"};
@@ -156,10 +157,11 @@ OrtStatus* ORT_API_CALL ExampleEpFactory::GetSupportedDevicesImpl(OrtEpFactory*
       }
 
       // register the allocator info required by the EP.
-      // registering OrtMemoryInfo for host accessible memory would be done in an additional call.
       // OrtReadOnlyAllocator + OrtDeviceMemoryType_DEFAULT allocator for use with initializers is optional.
+      // OrtDeviceMemoryType_HOST_ACCESSIBLE is also optional and exposes CPU-accessible memory on the EP device.
       RETURN_IF_ERROR(factory->ep_api.EpDevice_AddAllocatorInfo(ep_device, factory->default_memory_info_));
       RETURN_IF_ERROR(factory->ep_api.EpDevice_AddAllocatorInfo(ep_device, factory->readonly_memory_info_));
+      RETURN_IF_ERROR(factory->ep_api.EpDevice_AddAllocatorInfo(ep_device, factory->host_accessible_memory_info_));
 
       ep_devices[num_ep_devices++] = ep_device;
     }
@@ -244,8 +246,9 @@ OrtStatus* ORT_API_CALL ExampleEpFactory::CreateAllocatorImpl(OrtEpFactory* this
 
   bool is_default_allocator = memory_info == factory.default_memory_info_;
   bool is_readonly_allocator = memory_info == factory.readonly_memory_info_;
+  bool is_host_accessible_allocator = memory_info == factory.host_accessible_memory_info_;
 
-  if (!is_default_allocator && !is_readonly_allocator) {
+  if (!is_default_allocator && !is_readonly_allocator && !is_host_accessible_allocator) {
     return factory.ort_api.CreateStatus(ORT_INVALID_ARGUMENT,
                                         "INTERNAL ERROR! Unknown memory info provided to CreateAllocator. "
                                         "Value did not come directly from an OrtEpDevice returned by this factory.");
@@ -261,9 +264,10 @@ OrtStatus* ORT_API_CALL ExampleEpFactory::CreateAllocatorImpl(OrtEpFactory* this
   //       You are of course free to have completely different settings.
 
   // the read-only allocator is used for initializers. we don't need an arena for that.
-  if (is_readonly_allocator) {
-    auto read_only_allocator = std::make_unique<CustomAllocator>(memory_info, factory);
-    *allocator = read_only_allocator.release();
+  // host-accessible memory is also returned via a plain non-arena allocator.
+  if (is_readonly_allocator || is_host_accessible_allocator) {
+    auto simple_allocator = std::make_unique<CustomAllocator>(memory_info, factory);
+    *allocator = simple_allocator.release();
     return nullptr;
   }
 

diff --git a/onnxruntime/test/autoep/library/example_plugin_ep/ep_factory.h b/onnxruntime/test/autoep/library/example_plugin_ep/ep_factory.h
@@ -114,6 +114,7 @@ class ExampleEpFactory : public OrtEpFactory, public ApiPtrs {
   // CPU allocator so we can control the arena behavior. optional as ORT always provides a CPU allocator if needed.
   Ort::MemoryInfo default_memory_info_;
   Ort::MemoryInfo readonly_memory_info_;  // used for initializers
+  Ort::MemoryInfo host_accessible_memory_info_;
 
   bool arena_allocator_using_default_settings_{true};
   std::unique_ptr<ArenaAllocator> arena_allocator_;  // shared device allocator that uses an arena

diff --git a/onnxruntime/test/python/onnxruntime_test_python_autoep.py b/onnxruntime/test/python/onnxruntime_test_python_autoep.py
@@ -9,6 +9,7 @@
 from collections.abc import Sequence
 
 import numpy as np
+import onnx
 from autoep_helper import AutoEpTestCase
 from helper import get_name
 
@@ -23,6 +24,8 @@
 
 
 class TestAutoEP(AutoEpTestCase):
+    EXAMPLE_EP_NAME = "example_ep"
+
     def test_cuda_ep_register_and_inference(self):
         """
         Test registration of CUDA EP, adding its OrtDevice to the SessionOptions, and running inference.
@@ -341,6 +344,145 @@ def test_copy_tensors(self):
 
         self.unregister_execution_provider_library(ep_name)
 
+    def _register_example_plugin_ep_or_skip(self):
+        """Register the example plugin EP and return its OrtEpDevice, or skip the test."""
+        if sys.platform != "win32":
+            self.skipTest("Skipping test because device discovery is only supported on Windows")
+
+        try:
+            ep_lib_path = get_name("example_plugin_ep.dll")
+        except FileNotFoundError:
+            self.skipTest("Skipping test because example_plugin_ep.dll cannot be found")
+
+        self.register_execution_provider_library(self.EXAMPLE_EP_NAME, os.path.realpath(ep_lib_path))
+
+        ep_device = next(
+            (d for d in onnxrt.get_ep_devices() if d.ep_name == self.EXAMPLE_EP_NAME),
+            None,
+        )
+        self.assertIsNotNone(ep_device, f"Could not find OrtEpDevice for registered EP '{self.EXAMPLE_EP_NAME}'")
+        return ep_device
+
+    def test_ortvalue_from_shape_and_type_host_accessible_numpy_dtype(self):
+        ep_device = self._register_example_plugin_ep_or_skip()
+        mem_info = ep_device.memory_info(onnxrt.OrtDeviceMemoryType.HOST_ACCESSIBLE)
+        self.assertIsNotNone(mem_info)
+
+        ort_value = onnxrt.OrtValue.ortvalue_from_shape_and_type([3, 2], np.float32, memory_info=mem_info)
+
+        self.assertEqual(ort_value.shape(), [3, 2])
+        self.assertEqual(ort_value.data_type(), "tensor(float)")
+        # The example EP advertises HOST_ACCESSIBLE on a fake GPU device, so the allocator
+        # came from memory_info rather than the default CPU path.
+        self.assertNotEqual(ort_value.device_name().lower(), "cpu")
+
+        result = ort_value.numpy()
+        self.assertEqual(result.shape, (3, 2))
+        self.assertEqual(result.dtype, np.float32)
+
+        del ort_value
+        self.unregister_execution_provider_library(self.EXAMPLE_EP_NAME)
+
+    def test_ortvalue_from_shape_and_type_host_accessible_onnx_int_type(self):
+        ep_device = self._register_example_plugin_ep_or_skip()
+        mem_info = ep_device.memory_info(onnxrt.OrtDeviceMemoryType.HOST_ACCESSIBLE)
+
+        ort_value = onnxrt.OrtValue.ortvalue_from_shape_and_type(
+            [4], onnx.TensorProto.FLOAT, memory_info=mem_info
+        )
+
+        self.assertEqual(ort_value.shape(), [4])
+        self.assertEqual(ort_value.data_type(), "tensor(float)")
+        self.assertEqual(ort_value.numpy().dtype, np.float32)
+
+        del ort_value
+        self.unregister_execution_provider_library(self.EXAMPLE_EP_NAME)
+
+    def test_ortvalue_host_accessible_zero_copy_numpy_view(self):
+        # Writing through view1 must be visible through view2 - if numpy() ever copies,
+        # this test fails and the UsesCpuMemory() zero-copy guarantee has regressed.
+        ep_device = self._register_example_plugin_ep_or_skip()
+        mem_info = ep_device.memory_info(onnxrt.OrtDeviceMemoryType.HOST_ACCESSIBLE)
+
+        ort_value = onnxrt.OrtValue.ortvalue_from_shape_and_type([2, 3], np.float32, memory_info=mem_info)
+        ort_value.numpy().fill(7.5)
+        np.testing.assert_array_equal(ort_value.numpy(), np.full((2, 3), 7.5, dtype=np.float32))
+
+        cpu_value = onnxrt.OrtValue.ortvalue_from_shape_and_type([2, 3], np.float32)
+        cpu_value.numpy().fill(-1.25)
+        np.testing.assert_array_equal(cpu_value.numpy(), np.full((2, 3), -1.25, dtype=np.float32))
+
+        del ort_value
+        del cpu_value
+        self.unregister_execution_provider_library(self.EXAMPLE_EP_NAME)
+
+    def test_ortvalue_from_shape_and_type_memory_info_no_allocator(self):
+        bogus_mem_info = onnxrt.OrtMemoryInfo.create_v2(
+            "Bogus",
+            onnxrt.OrtMemoryInfoDeviceType.GPU,
+            0xDEAD,
+            0,
+            onnxrt.OrtDeviceMemoryType.HOST_ACCESSIBLE,
+            0,
+            onnxrt.OrtAllocatorType.ORT_DEVICE_ALLOCATOR,
+        )
+
+        with self.assertRaisesRegex(RuntimeError, "No shared allocator found"):
+            onnxrt.OrtValue.ortvalue_from_shape_and_type([2], np.float32, memory_info=bogus_mem_info)
+
+    def test_ortvalue_from_shape_and_onnx_type_memory_info_string_rejected(self):
+        ep_device = self._register_example_plugin_ep_or_skip()
+        mem_info = ep_device.memory_info(onnxrt.OrtDeviceMemoryType.HOST_ACCESSIBLE)
+
+        with self.assertRaisesRegex(RuntimeError, "non-string numpy arrays"):
+            onnxrt.OrtValue.ortvalue_from_shape_and_type(
+                [2], onnx.TensorProto.STRING, memory_info=mem_info
+            )
+
+        self.unregister_execution_provider_library(self.EXAMPLE_EP_NAME)
+
+    def test_ortvalue_from_shape_and_type_memory_info_overrides_device_args(self):
+        ep_device = self._register_example_plugin_ep_or_skip()
+        mem_info = ep_device.memory_info(onnxrt.OrtDeviceMemoryType.HOST_ACCESSIBLE)
+
+        # Bogus device args alongside a valid memory_info: if the wrapper ever stops ignoring
+        # them, this would fail (unknown device) or silently allocate elsewhere.
+        ort_value = onnxrt.OrtValue.ortvalue_from_shape_and_type(
+            [3],
+            np.float32,
+            device_type="cuda",
+            device_id=99,
+            vendor_id=0xFFFF,
+            memory_info=mem_info,
+        )
+
+        ort_value_baseline = onnxrt.OrtValue.ortvalue_from_shape_and_type([3], np.float32, memory_info=mem_info)
+        self.assertEqual(ort_value.device_name(), ort_value_baseline.device_name())
+
+        del ort_value
+        del ort_value_baseline
+        self.unregister_execution_provider_library(self.EXAMPLE_EP_NAME)
+
+    def test_ortvalue_from_shape_and_type_default_memory_info(self):
+        # Pins the false-branch of UsesCpuMemory(): DEFAULT memory on a non-CPU device must
+        # round-trip through data_transfer rather than the zero-copy view path.
+        ep_device = self._register_example_plugin_ep_or_skip()
+        mem_info = ep_device.memory_info(onnxrt.OrtDeviceMemoryType.DEFAULT)
+        self.assertIsNotNone(mem_info)
+
+        ort_value = onnxrt.OrtValue.ortvalue_from_shape_and_type([2, 3], np.float32, memory_info=mem_info)
+
+        self.assertEqual(ort_value.shape(), [2, 3])
+        self.assertEqual(ort_value.data_type(), "tensor(float)")
+        self.assertNotEqual(ort_value.device_name().lower(), "cpu")
+
+        arr = ort_value.numpy()
+        self.assertEqual(arr.shape, (2, 3))
+        self.assertEqual(arr.dtype, np.float32)
+
+        del ort_value
+        self.unregister_execution_provider_library(self.EXAMPLE_EP_NAME)
+
 
 if __name__ == "__main__":
     unittest.main(verbosity=1)